def testStopSectionTimeline(self):
        eaist.segment_current_trips(self.testUUID)
        eaiss.segment_current_sections(self.testUUID)
        tl = esdt.get_raw_timeline(self.testUUID, self.day_start_ts,
                                   self.day_end_ts)

        for i, element in enumerate(tl):
            logging.debug("%s: %s" % (i, type(element)))
            curr_type = self.get_type(element)
            if curr_type == ecwrt.Rawtrip:
                curr_tl = esdtq.get_raw_timeline_for_trip(
                    self.testUUID, element.get_id())
                logging.debug("Got timeline %s for trip %s" %
                              (curr_tl, element.start_fmt_time))
                prev_sub_type = None
                prev_element = None
                checked_count = 0
                j = 0
                for j, curr_element in enumerate(curr_tl):
                    logging.debug("curr_element = %s" % curr_element)
                    curr_sub_type = self.get_type(curr_element)
                    if prev_sub_type is not None:
                        checked_count = checked_count + 1
                        self.assertNotEqual(prev_sub_type, curr_sub_type)
                        if prev_sub_type == ecws.Stop:
                            self.assertEqual(prev_element.starting_section,
                                             curr_element.get_id())
                        else:
                            self.assertEqual(prev_sub_type, ecwsc.Section)
                            self.assertEqual(prev_element.end_stop,
                                             curr_element.get_id())
                    prev_sub_type = curr_sub_type
                    prev_element = curr_element
                self.assertEqual(checked_count, j)
 def testE2E(self):
     eaist.segment_current_trips(self.testUUID)
     eaiss.segment_current_sections(self.testUUID)
     esdtmq.make_tour_model_from_raw_user_data(self.testUUID)
     tm = esdtmq.get_tour_model(self.testUUID)
     self.assertTrue(len(tm["common_trips"]) > 0)
     self.assertTrue(len(tm["common_places"]) > 0)
    def testTripGeojson(self):
        eaist.segment_current_trips(self.testUUID)
        eaiss.segment_current_sections(self.testUUID)
        eaicl.filter_current_sections(self.testUUID)
        tl = esdtl.get_raw_timeline(self.testUUID, 1440658800, 1440745200)
        self.assertEquals(len(tl.trips), 8)

        eaicr.clean_and_resample(self.testUUID)

        tl = esdtl.get_cleaned_timeline(self.testUUID, 1440658800, 1440745200)
        tl.fill_start_end_places()

        created_trips = tl.trips
        self.assertEquals(len(created_trips), 7)

        trip_geojson = gjfc.trip_to_geojson(created_trips[0], tl)
        logging.debug("first trip_geojson = %s" %
                      bju.dumps(trip_geojson, indent=4))

        self.assertEquals(trip_geojson.type, "FeatureCollection")
        self.assertEquals(trip_geojson.properties["feature_type"], "trip")
        self.assertEquals(len(trip_geojson.features), 5)

        day_geojson = gjfc.get_geojson_for_timeline(self.testUUID, tl)
        self.assertEquals(len(day_geojson), 7)
        self.assertEquals(day_geojson[-1].type, "FeatureCollection")
        self.assertEquals(day_geojson[-1].properties["feature_type"], "trip")
        self.assertEquals(len(day_geojson[-1].features), 5)
    def testTripGeojson(self):
        eaist.segment_current_trips(self.testUUID)
        eaiss.segment_current_sections(self.testUUID)
        eaicl.filter_current_sections(self.testUUID)
        tl = esdtl.get_raw_timeline(self.testUUID, 1440658800, 1440745200)
        self.assertEquals(len(tl.trips), 9)

        eaicr.clean_and_resample(self.testUUID)
        eacimp.predict_mode(self.testUUID)

        tl = esdtl.get_cleaned_timeline(self.testUUID, 1440658800, 1440745200)
        tl.fill_start_end_places()

        created_trips = tl.trips
        self.assertEquals(len(created_trips), 9)

        trip_geojson = gjfc.trip_to_geojson(created_trips[0], tl)
        logging.debug("first trip_geojson = %s" % bju.dumps(trip_geojson, indent=4))

        self.assertEquals(trip_geojson.type, "FeatureCollection")
        self.assertEquals(trip_geojson.properties["feature_type"], "trip")
        self.assertEquals(len(trip_geojson.features), 5)

        day_geojson = gjfc.get_geojson_for_timeline(self.testUUID, tl)
        self.assertEquals(len(day_geojson), 8)
        self.assertEquals(day_geojson[-1].type, "FeatureCollection")
        self.assertEquals(day_geojson[-1].properties["feature_type"], "trip")
        self.assertEquals(len(day_geojson[-1].features), 5)
Пример #5
0
    def testStopSectionTimeline(self):
        eaist.segment_current_trips(self.testUUID)
        eaiss.segment_current_sections(self.testUUID)
        tl = esdt.get_raw_timeline(self.testUUID, self.day_start_ts, self.day_end_ts)


        for i, element in enumerate(tl):
            logging.debug("%s: %s" % (i, type(element)))
            curr_type = self.get_type(element)
            if curr_type == ecwrt.Rawtrip:
                curr_tl = esdtq.get_raw_timeline_for_trip(self.testUUID, element.get_id())
                logging.debug("Got timeline %s for trip %s" % (curr_tl, element.start_fmt_time))
                prev_sub_type = None
                prev_element = None
                checked_count = 0
                j = 0
                for j, curr_element in enumerate(curr_tl):
                    logging.debug("curr_element = %s" % curr_element)
                    curr_sub_type = self.get_type(curr_element)
                    if prev_sub_type is not None:
                        checked_count = checked_count + 1
                        self.assertNotEqual(prev_sub_type, curr_sub_type)
                        if prev_sub_type == ecws.Stop:
                            self.assertEqual(prev_element.starting_section, curr_element.get_id())
                        else:
                            self.assertEqual(prev_sub_type, ecwsc.Section)
                            self.assertEqual(prev_element.end_stop, curr_element.get_id())
                    prev_sub_type = curr_sub_type
                    prev_element = curr_element
                self.assertEqual(checked_count, j)
 def setUp(self):
     self.clearRelatedDb()
     edb.get_trip_db().remove()
     edb.get_section_db().remove()
     edb.get_trip_new_db().remove()
     edb.get_section_new_db().remove()
     etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-21")
     eaicf.filter_accuracy(self.testUUID)
     estfm.move_all_filters_to_data()
     eaist.segment_current_trips(self.testUUID)
     eaiss.segment_current_sections(self.testUUID)
Пример #7
0
    def testTripGeojson(self):
        eaist.segment_current_trips(self.testUUID)
        eaiss.segment_current_sections(self.testUUID)

        tl = esdtl.get_timeline(self.testUUID, 1440658800, 1440745200)
        tl.fill_start_end_places()

        created_trips = tl.trips
        self.assertEquals(len(created_trips), 8)

        trip_geojson = gjfc.trip_to_geojson(created_trips[0], tl)
        logging.debug("trip_geojson = %s" % gj.dumps(trip_geojson, indent=4))
Пример #8
0
    def testRemoveAllOutliers(self):
        etc.setupRealExample(
            self, "emission/tests/data/real_examples/shankari_2016-06-20")
        self.ts = esta.TimeSeries.get_time_series(self.testUUID)
        eaist.segment_current_trips(self.testUUID)
        eaiss.segment_current_sections(self.testUUID)
        eaicl.filter_current_sections(self.testUUID)
        # get all sections
        sections = [
            ecwe.Entry(s) for s in self.ts.find_entries([esda.RAW_SECTION_KEY],
                                                        time_query=None)
        ]
        for section in sections:
            filtered_points_entry_doc = self.ts.get_entry_at_ts(
                "analysis/smoothing", "data.section", section.get_id())
            if filtered_points_entry_doc is not None:
                logging.debug("Found smoothing result for section %s" %
                              section.get_id())
                # Setting the set of deleted points to everything
                loc_tq = esda.get_time_query_for_trip_like(
                    esda.RAW_SECTION_KEY, section.get_id())
                loc_df = self.ts.get_data_df("background/filtered_location",
                                             loc_tq)
                filtered_points_entry_doc["data"]["deleted_points"] = loc_df[
                    "_id"].tolist()
                self.ts.update(ecwe.Entry(filtered_points_entry_doc))

        # All we care is that this should not crash.
        eaicr.clean_and_resample(self.testUUID)

        # Most of the trips have zero length, but apparently one has non-zero length
        # because the stop length is non zero!!
        # So there is only one cleaned trip left
        cleaned_trips_df = self.ts.get_data_df(esda.CLEANED_TRIP_KEY,
                                               time_query=None)
        self.assertEqual(len(cleaned_trips_df), 1)

        # We don't support squishing sections, but we only store stops and sections
        # for non-squished trips. And this non-squished trip happens to have
        # two sections and one stop
        cleaned_sections_df = self.ts.get_data_df(esda.CLEANED_SECTION_KEY,
                                                  time_query=None)
        self.assertEqual(len(cleaned_sections_df), 2)
        self.assertEqual(cleaned_sections_df.distance.tolist(), [0, 0])

        cleaned_stops_df = self.ts.get_data_df(esda.CLEANED_STOP_KEY,
                                               time_query=None)
        self.assertEqual(len(cleaned_stops_df), 1)
        self.assertAlmostEqual(cleaned_stops_df.distance[0], 3252, places=0)
Пример #9
0
 def testCreatePlace(self):
     etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
     eaicf.filter_accuracy(self.testUUID)
     estfm.move_all_filters_to_data()
     eaist.segment_current_trips(self.testUUID)
     eaiss.segment_current_sections(self.testUUID)
     data = eamtcp.main(self.testUUID)
     esdcpq.create_places(data, self.testUUID)
     places = esdcpq.get_all_common_places_for_user(self.testUUID)
     places_list = []
     for p in places:
         places_list.append(esdcpq.make_common_place(p))
     for place in places_list:
         self.assertIsNotNone(place.location)
         self.assertIsNotNone(place["successors"])
    def testSegmentationWrapperWithAutoTrip(self):
        eaist.segment_current_trips(self.testUUID)
        eaiss.segment_current_sections(self.testUUID)

        tq_trip = enua.UserCache.TimeQuery("start_ts", 1440658800, 1440745200)
        created_trips = esdt.get_trips(self.testUUID, tq_trip)

        for i, trip in enumerate(created_trips):
            created_stops = esdt.get_stops_for_trip(self.testUUID, trip.get_id())
            created_sections = esdt.get_sections_for_trip(self.testUUID, trip.get_id())

            for j, stop in enumerate(created_stops):
                logging.info("Retrieved stops %s: %s -> %s" % (j, stop.enter_fmt_time, stop.exit_fmt_time))
            for j, section in enumerate(created_sections):
                logging.info("Retrieved sections %s: %s -> %s" % (j, section.start_fmt_time, section.end_fmt_time))
Пример #11
0
    def testSegmentationWrapperIOS(self):
        eaist.segment_current_trips(self.iosUUID)
        # The previous line should have created places and trips and stored
        # them into the database. Now, we want to query to ensure that they
        # were created correctly.
        tq_place = estt.TimeQuery("data.enter_ts", 1446796800, 1446847600)
        created_places_entries = esda.get_entries(esda.RAW_PLACE_KEY,
                                                  self.iosUUID, tq_place)

        tq_trip = estt.TimeQuery("data.start_ts", 1446796800, 1446847600)
        created_trips_entries = esda.get_entries(esda.RAW_TRIP_KEY,
                                                 self.iosUUID, tq_trip)

        for i, place in enumerate(created_places_entries):
            logging.debug(
                "Retrieved places %s: %s -> %s" %
                (i, place.data.enter_fmt_time, place.data.exit_fmt_time))
        for i, trip in enumerate(created_trips_entries):
            logging.debug(
                "Retrieved trips %s: %s -> %s" %
                (i, trip.data.start_fmt_time, trip.data.end_fmt_time))

        # We expect there to be 4 places, but the first one is that start of
        # the chain, so it has a start_time of None and it won't be retrieved
        # by the query on the start_time that we show here.
        self.assertEqual(len(created_places_entries), 2)
        self.assertEqual(len(created_trips_entries), 2)

        # Pick the first two trips and the first place and ensure that they are all linked correctly
        # Note that this is the first place, not the second place because the true first place will not
        # be retrieved by the query, as shown above
        # The first trip here is a dummy trip, so let's check the second and third trip instead
        trip0 = created_trips_entries[0]
        trip1 = created_trips_entries[1]
        place0 = created_places_entries[0]

        self.assertEqual(trip0.data.end_place, place0.get_id())
        self.assertEqual(trip1.data.start_place, place0.get_id())
        self.assertEqual(place0.data.ending_trip, trip0.get_id())
        self.assertEqual(place0.data.starting_trip, trip1.get_id())

        self.assertEqual(round(trip0.data.duration), 14 * 60 + 41)
        self.assertEqual(round(trip1.data.duration),
                         1 * 60 * 60 + 50 * 60 + 56)

        self.assertIsNotNone(place0.data.location)
Пример #12
0
def runIntakePipeline(uuid):
    # Move these imports here so that we don't inadvertently load the modules,
    # and any related config modules, before we want to
    import emission.analysis.intake.cleaning.filter_accuracy as eaicf
    import emission.storage.timeseries.format_hacks.move_filter_field as estfm
    import emission.analysis.intake.segmentation.trip_segmentation as eaist
    import emission.analysis.intake.segmentation.section_segmentation as eaiss
    import emission.analysis.intake.cleaning.location_smoothing as eaicl
    import emission.analysis.intake.cleaning.clean_and_resample as eaicr
    import emission.analysis.classification.inference.mode.pipeline as eacimp

    eaicf.filter_accuracy(uuid)
    eaist.segment_current_trips(uuid)
    eaiss.segment_current_sections(uuid)
    eaicl.filter_current_sections(uuid)
    eaicr.clean_and_resample(uuid)
    eacimp.predict_mode(uuid)
Пример #13
0
def runIntakePipeline(uuid):
    # Move these imports here so that we don't inadvertently load the modules,
    # and any related config modules, before we want to
    import emission.analysis.intake.cleaning.filter_accuracy as eaicf
    import emission.storage.timeseries.format_hacks.move_filter_field as estfm
    import emission.analysis.intake.segmentation.trip_segmentation as eaist
    import emission.analysis.intake.segmentation.section_segmentation as eaiss
    import emission.analysis.intake.cleaning.location_smoothing as eaicl
    import emission.analysis.intake.cleaning.clean_and_resample as eaicr
    import emission.analysis.classification.inference.mode.pipeline as eacimp

    eaicf.filter_accuracy(uuid)
    eaist.segment_current_trips(uuid)
    eaiss.segment_current_sections(uuid)
    eaicl.filter_current_sections(uuid)
    eaicr.clean_and_resample(uuid)
    eacimp.predict_mode(uuid)
    def testCreateFromData(self):
        etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
        eaicf.filter_accuracy(self.testUUID)
        estfm.move_all_filters_to_data()
        eaist.segment_current_trips(self.testUUID)
        eaiss.segment_current_sections(self.testUUID)

        trips = esdctp.get_all_common_trips_for_user(self.testUUID)
        trips_list = []
        for p in trips:
            trips_list.append(esdctp.make_common_trip_from_json(p))
        for trip in trips_list:
            self.assertIsNotNone(trip.start_loc)
            self.assertIsNotNone(trip.end_loc)
            self.assertTrue(len(trip["trips"]) > 0)
            rand_id = random.choice(trip["trips"])
            self.assertEqual(type(esdtq.get_trip(rand_id)), ecwt.Trip) 
            self.assertTrue(trip.probabilites.sum() > 0)
            self.assertEqual(str(trip.user_id), "test2")
Пример #15
0
    def testIOSSegmentationWrapperWithAutoTrip(self):
        eaist.segment_current_trips(self.iosUUID)
        eaiss.segment_current_sections(self.iosUUID)

        tq_trip = estt.TimeQuery("data.start_ts", 1446700000, 1446900000)
        created_trips = esda.get_entries(esda.RAW_TRIP_KEY, self.iosUUID,
                                         tq_trip)

        self.assertEqual(len(created_trips), 2)
        logging.debug("created trips = %s" % created_trips)

        sections_stops = [(len(esdt.get_raw_sections_for_trip(self.iosUUID, trip.get_id())),
                           len(esdt.get_raw_stops_for_trip(self.iosUUID, trip.get_id())))
                          for trip in created_trips]
        logging.debug(sections_stops)
        self.assertEqual(len(sections_stops), len(created_trips))
        # The expected value was copy-pasted from the debug statement above
        self.assertEqual(sections_stops,
                         [(0, 0), (11, 10)])
Пример #16
0
    def testSegmentationWrapperWithAutoTrip(self):
        eaist.segment_current_trips(self.androidUUID)
        eaiss.segment_current_sections(self.androidUUID)

        tq_trip = estt.TimeQuery("data.start_ts", 1440658800, 1440745200)
        created_trips = esda.get_entries(esda.RAW_TRIP_KEY, self.androidUUID,
                                         tq_trip)

        self.assertEqual(len(created_trips), 8)

        sections_stops = [(len(esdt.get_raw_sections_for_trip(self.androidUUID, trip.get_id())),
                           len(esdt.get_raw_stops_for_trip(self.androidUUID, trip.get_id())))
                          for trip in created_trips]
        logging.debug(sections_stops)
        self.assertEqual(len(sections_stops), len(created_trips))
        # The expected value was copy-pasted from the debug statement above
        self.assertEqual(sections_stops,
                         [(2, 1), (1, 0), (2, 1), (2, 1), (1, 0), (2, 1),
                          (4, 3), (2, 1)])
    def testSegmentationWrapperWithAutoTrip(self):
        eaist.segment_current_trips(self.androidUUID)
        eaiss.segment_current_sections(self.androidUUID)

        tq_trip = estt.TimeQuery("data.start_ts", 1440658800, 1440745200)
        created_trips = esda.get_entries(esda.RAW_TRIP_KEY, self.androidUUID,
                                         tq_trip)

        self.assertEqual(len(created_trips), 8)

        sections_stops = [(len(esdt.get_raw_sections_for_trip(self.androidUUID, trip.get_id())),
                           len(esdt.get_raw_stops_for_trip(self.androidUUID, trip.get_id())))
                          for trip in created_trips]
        logging.debug(sections_stops)
        self.assertEqual(len(sections_stops), len(created_trips))
        # The expected value was copy-pasted from the debug statement above
        self.assertEqual(sections_stops,
                         [(2, 1), (1, 0), (2, 1), (2, 1), (1, 0), (2, 1),
                          (4, 3), (2, 1)])
Пример #18
0
    def testSegmentationWrapperWithAutoTrip(self):
        eaist.segment_current_trips(self.testUUID)
        eaiss.segment_current_sections(self.testUUID)

        tq_trip = enua.UserCache.TimeQuery("start_ts", 1440658800, 1440745200)
        created_trips = esdt.get_trips(self.testUUID, tq_trip)

        for i, trip in enumerate(created_trips):
            created_stops = esdt.get_stops_for_trip(self.testUUID,
                                                    trip.get_id())
            created_sections = esdt.get_sections_for_trip(
                self.testUUID, trip.get_id())

            for j, stop in enumerate(created_stops):
                logging.info("Retrieved stops %s: %s -> %s" %
                             (j, stop.enter_fmt_time, stop.exit_fmt_time))
            for j, section in enumerate(created_sections):
                logging.info("Retrieved sections %s: %s -> %s" %
                             (j, section.start_fmt_time, section.end_fmt_time))
    def testIOSSegmentationWrapperWithAutoTrip(self):
        eaist.segment_current_trips(self.iosUUID)
        eaiss.segment_current_sections(self.iosUUID)

        tq_trip = estt.TimeQuery("data.start_ts", 1446700000, 1446900000)
        created_trips = esda.get_entries(esda.RAW_TRIP_KEY, self.iosUUID,
                                         tq_trip)

        self.assertEqual(len(created_trips), 2)
        logging.debug("created trips = %s" % created_trips)

        sections_stops = [(len(esdt.get_raw_sections_for_trip(self.iosUUID, trip.get_id())),
                           len(esdt.get_raw_stops_for_trip(self.iosUUID, trip.get_id())))
                          for trip in created_trips]
        logging.debug(sections_stops)
        self.assertEqual(len(sections_stops), len(created_trips))
        # The expected value was copy-pasted from the debug statement above
        self.assertEqual(sections_stops,
                         [(0, 0), (11, 10)])
    def testCreateFromData(self):
        etc.setupRealExample(
            self, "emission/tests/data/real_examples/shankari_2015-aug-27")
        eaicf.filter_accuracy(self.testUUID)
        estfm.move_all_filters_to_data()
        eaist.segment_current_trips(self.testUUID)
        eaiss.segment_current_sections(self.testUUID)

        trips = esdctp.get_all_common_trips_for_user(self.testUUID)
        trips_list = []
        for p in trips:
            trips_list.append(esdctp.make_common_trip_from_json(p))
        for trip in trips_list:
            self.assertIsNotNone(trip.start_loc)
            self.assertIsNotNone(trip.end_loc)
            self.assertTrue(len(trip["trips"]) > 0)
            rand_id = random.choice(trip["trips"])
            self.assertEqual(type(esdtq.get_trip(rand_id)), ecwt.Trip)
            self.assertTrue(trip.probabilites.sum() > 0)
            self.assertEqual(str(trip.user_id), "test2")
    def testSegmentationWrapperIOS(self):
        eaist.segment_current_trips(self.iosUUID)
        # The previous line should have created places and trips and stored
        # them into the database. Now, we want to query to ensure that they
        # were created correctly.
        tq_place = estt.TimeQuery("data.enter_ts", 1446796800, 1446847600)
        created_places_entries = esda.get_entries(esda.RAW_PLACE_KEY,
                                                  self.iosUUID, tq_place)

        tq_trip = estt.TimeQuery("data.start_ts", 1446796800, 1446847600)
        created_trips_entries = esda.get_entries(esda.RAW_TRIP_KEY,
                                                 self.iosUUID, tq_trip)

        for i, place in enumerate(created_places_entries):
            logging.debug("Retrieved places %s: %s -> %s" % (i, place.data.enter_fmt_time, place.data.exit_fmt_time))
        for i, trip in enumerate(created_trips_entries):
            logging.debug("Retrieved trips %s: %s -> %s" % (i, trip.data.start_fmt_time, trip.data.end_fmt_time))

        # We expect there to be 4 places, but the first one is that start of
        # the chain, so it has a start_time of None and it won't be retrieved
        # by the query on the start_time that we show here.
        self.assertEqual(len(created_places_entries), 3)
        self.assertEqual(len(created_trips_entries), 3)

        # Pick the first two trips and the first place and ensure that they are all linked correctly
        # Note that this is the first place, not the second place because the true first place will not
        # be retrieved by the query, as shown above
        # The first trip here is a dummy trip, so let's check the second and third trip instead
        trip0 = created_trips_entries[1]
        trip1 = created_trips_entries[2]
        place0 = created_places_entries[1]

        self.assertEqual(trip0.data.end_place, place0.get_id())
        self.assertEqual(trip1.data.start_place, place0.get_id())
        self.assertEqual(place0.data.ending_trip, trip0.get_id())
        self.assertEqual(place0.data.starting_trip, trip1.get_id())

        self.assertEqual(round(trip0.data.duration), 58 * 60 + 51)
        self.assertEqual(round(trip1.data.duration), 38 * 60 + 57)

        self.assertIsNotNone(place0.data.location)
Пример #22
0
    def testPlaceTripTimeline(self):
        eaist.segment_current_trips(self.testUUID)
        tl = esdt.get_timeline(self.testUUID, self.day_start_ts, self.day_end_ts)

        prev_type = None
        prev_element = None
        checked_count = 0
        for i, curr_element in enumerate(tl):
            # logging.debug("%s: %s" % (i, curr_element))
            curr_type = self.get_type(curr_element)
            if prev_type is not None:
                checked_count = checked_count + 1
                self.assertNotEqual(prev_type, curr_type)
                if prev_type == ecwp.Place:
                    self.assertEqual(prev_element.starting_trip, curr_element.get_id())
                else:
                    self.assertEqual(prev_type, ecwt.Trip)
                    self.assertEqual(prev_element.end_place, curr_element.get_id())
            prev_type = curr_type
            prev_element = curr_element
        self.assertEqual(checked_count, i)
    def testSegmentationWrapperWithAutoTrip(self):
        eaist.segment_current_trips(self.testUUID)
        eaiss.segment_current_sections(self.testUUID)

        tq_trip = estt.TimeQuery("data.start_ts", 1440658800, 1440745200)
        created_trips = esda.get_entries(esda.RAW_TRIP_KEY, self.testUUID,
                                         tq_trip)

        for i, trip in enumerate(created_trips):
            logging.debug("current trip is %s" % trip)
            created_stops = esdt.get_raw_stops_for_trip(self.testUUID, trip.get_id())
            created_sections = esdt.get_raw_sections_for_trip(self.testUUID, trip.get_id())

            for j, stop in enumerate(created_stops):
                logging.info("Retrieved stops %s: %s -> %s" %
                             (j, stop.data.enter_fmt_time,
                              stop.data.exit_fmt_time))
            for j, section in enumerate(created_sections):
                logging.info("Retrieved sections %s: %s -> %s" %
                             (j, section.data.start_fmt_time,
                              section.data.end_fmt_time))
    def testSegmentationWrapperAndroid(self):
        eaist.segment_current_trips(self.androidUUID)
        # The previous line should have created places and trips and stored
        # them into the database. Now, we want to query to ensure that they
        # were created correctly.
        tq_place = enua.UserCache.TimeQuery("enter_ts", 1440658800, 1440745200)
        created_places = esdp.get_places(self.androidUUID, tq_place)

        tq_trip = enua.UserCache.TimeQuery("start_ts", 1440658800, 1440745200)
        created_trips = esdt.get_trips(self.androidUUID, tq_trip)

        for i, place in enumerate(created_places):
            logging.debug("Retrieved places %s: %s -> %s" %
                          (i, place.enter_fmt_time, place.exit_fmt_time))
        for i, trip in enumerate(created_trips):
            logging.debug("Retrieved trips %s: %s -> %s" %
                          (i, trip.start_fmt_time, trip.end_fmt_time))

        # We expect there to be 9 places, but the first one is that start of
        # the chain, so it has a start_time of None and it won't be retrieved
        # by the query on the start_time that we show here.
        self.assertEqual(len(created_places), 8)
        self.assertEqual(len(created_trips), 8)

        # Pick the first two trips and the first place and ensure that they are all linked correctly
        # Note that this is the first place, not the second place because the true first place will not
        # be retrieved by the query, as shown above
        trip0 = created_trips[0]
        trip1 = created_trips[1]
        place0 = created_places[0]

        self.assertEqual(trip0.end_place, place0.get_id())
        self.assertEqual(trip1.start_place, place0.get_id())
        self.assertEqual(place0.ending_trip, trip0.get_id())
        self.assertEqual(place0.starting_trip, trip1.get_id())

        self.assertEqual(round(trip0.duration), 11 * 60 + 9)
        self.assertEqual(round(trip1.duration), 6 * 60 + 54)

        self.assertIsNotNone(place0.location)
    def testRemoveAllOutliers(self):
        etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2016-06-20")
        self.ts = esta.TimeSeries.get_time_series(self.testUUID)
        eaist.segment_current_trips(self.testUUID)
        eaiss.segment_current_sections(self.testUUID)
        eaicl.filter_current_sections(self.testUUID)
        # get all sections
        sections = [ecwe.Entry(s) for s in self.ts.find_entries([esda.RAW_SECTION_KEY], time_query=None)]
        for section in sections:
            filtered_points_entry_doc = self.ts.get_entry_at_ts("analysis/smoothing",
                                                           "data.section",
                                                           section.get_id())
            if filtered_points_entry_doc is not None:
                logging.debug("Found smoothing result for section %s" % section.get_id())
                # Setting the set of deleted points to everything
                loc_tq = esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY, section.get_id())
                loc_df = self.ts.get_data_df("background/filtered_location", loc_tq)
                filtered_points_entry_doc["data"]["deleted_points"] = loc_df["_id"].tolist()
                self.ts.update(ecwe.Entry(filtered_points_entry_doc))

        # All we care is that this should not crash.
        eaicr.clean_and_resample(self.testUUID)

        # Most of the trips have zero length, but apparently one has non-zero length
        # because the stop length is non zero!!
        # So there is only one cleaned trip left
        cleaned_trips_df = self.ts.get_data_df(esda.CLEANED_TRIP_KEY, time_query=None)
        self.assertEqual(len(cleaned_trips_df), 1)

        # We don't support squishing sections, but we only store stops and sections
        # for non-squished trips. And this non-squished trip happens to have
        # two sections and one stop
        cleaned_sections_df = self.ts.get_data_df(esda.CLEANED_SECTION_KEY, time_query=None)
        self.assertEqual(len(cleaned_sections_df), 2)
        self.assertEqual(cleaned_sections_df.distance.tolist(), [0,0])

        cleaned_stops_df = self.ts.get_data_df(esda.CLEANED_STOP_KEY, time_query=None)
        self.assertEqual(len(cleaned_stops_df), 1)
        self.assertAlmostEqual(cleaned_stops_df.distance[0], 3252, places=0)
    def testSegmentationWrapperAndroid(self):
        eaist.segment_current_trips(self.androidUUID)
        # The previous line should have created places and trips and stored
        # them into the database. Now, we want to query to ensure that they
        # were created correctly.
        tq_place = enua.UserCache.TimeQuery("enter_ts", 1440658800, 1440745200)
        created_places = esdp.get_places(self.androidUUID, tq_place)

        tq_trip = enua.UserCache.TimeQuery("start_ts", 1440658800, 1440745200)
        created_trips = esdt.get_trips(self.androidUUID, tq_trip)

        for i, place in enumerate(created_places):
            logging.debug("Retrieved places %s: %s -> %s" % (i, place.enter_fmt_time, place.exit_fmt_time))
        for i, trip in enumerate(created_trips):
            logging.debug("Retrieved trips %s: %s -> %s" % (i, trip.start_fmt_time, trip.end_fmt_time))

        # We expect there to be 9 places, but the first one is that start of
        # the chain, so it has a start_time of None and it won't be retrieved
        # by the query on the start_time that we show here.
        self.assertEqual(len(created_places), 8)
        self.assertEqual(len(created_trips), 8)

        # Pick the first two trips and the first place and ensure that they are all linked correctly
        # Note that this is the first place, not the second place because the true first place will not
        # be retrieved by the query, as shown above
        trip0 = created_trips[0]
        trip1 = created_trips[1]
        place0 = created_places[0]

        self.assertEqual(trip0.end_place, place0.get_id())
        self.assertEqual(trip1.start_place, place0.get_id())
        self.assertEqual(place0.ending_trip, trip0.get_id())
        self.assertEqual(place0.starting_trip, trip1.get_id())

        self.assertEqual(round(trip0.duration), 11 * 60 + 9)
        self.assertEqual(round(trip1.duration), 6 * 60 + 54)

        self.assertIsNotNone(place0.location)
Пример #27
0
    def testSegmentationWrapperWithAutoTrip(self):
        eaist.segment_current_trips(self.testUUID)
        eaiss.segment_current_sections(self.testUUID)

        tq_trip = estt.TimeQuery("data.start_ts", 1440658800, 1440745200)
        created_trips = esda.get_entries(esda.RAW_TRIP_KEY, self.testUUID,
                                         tq_trip)

        for i, trip in enumerate(created_trips):
            logging.debug("current trip is %s" % trip)
            created_stops = esdt.get_raw_stops_for_trip(
                self.testUUID, trip.get_id())
            created_sections = esdt.get_raw_sections_for_trip(
                self.testUUID, trip.get_id())

            for j, stop in enumerate(created_stops):
                logging.info(
                    "Retrieved stops %s: %s -> %s" %
                    (j, stop.data.enter_fmt_time, stop.data.exit_fmt_time))
            for j, section in enumerate(created_sections):
                logging.info("Retrieved sections %s: %s -> %s" %
                             (j, section.data.start_fmt_time,
                              section.data.end_fmt_time))
Пример #28
0
def run_intake_pipeline_for_user(uuid):
        uh = euah.UserCacheHandler.getUserCacheHandler(uuid)

        with ect.Timer() as uct:
            logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10)
            print(str(arrow.now()) + "*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10)
            uh.moveToLongTerm()

        esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name,
                                 time.time(), uct.elapsed)


        # Hack until we delete these spurious entries
        # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868
        # Hack no longer works after the stats are in the timeseries because
        # every user, even really old ones, have the pipeline run for them,
        # which inserts pipeline_time stats.
        # Let's strip out users who only have pipeline_time entries in the timeseries
        # I wonder if this (distinct versus count) is the reason that the pipeline has
        # become so much slower recently. Let's try to actually delete the
        # spurious entries or at least mark them as obsolete and see if that helps.
        if edb.get_timeseries_db().find({"user_id": uuid}).distinct("metadata.key") == ["stats/pipeline_time"]:
            logging.debug("Found no entries for %s, skipping" % uuid)
            return

        with ect.Timer() as aft:
            logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10)
            print(str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10)
            eaicf.filter_accuracy(uuid)

        esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name,
                                 time.time(), aft.elapsed)

        with ect.Timer() as tst:
            logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10)
            print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10)
            eaist.segment_current_trips(uuid)

        esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name,
                                 time.time(), tst.elapsed)

        with ect.Timer() as sst:
            logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10)
            print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10)
            eaiss.segment_current_sections(uuid)

        esds.store_pipeline_time(uuid, ecwp.PipelineStages.SECTION_SEGMENTATION.name,
                                 time.time(), sst.elapsed)

        with ect.Timer() as jst:
            logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10)
            print(str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10)
            eaicl.filter_current_sections(uuid)

        esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name,
                                 time.time(), jst.elapsed)

        with ect.Timer() as crt:
            logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10)
            print(str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10)
            eaicr.clean_and_resample(uuid)

        esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name,
                                 time.time(), crt.elapsed)

        with ect.Timer() as act:
            logging.info("*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10)
            print(str(arrow.now()) + "*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10)
            autocheck.give_points_for_all_tasks(uuid)

        esds.store_pipeline_time(uuid, "AUTOCHECK_POINTS",
                                 time.time(), act.elapsed)

        with ect.Timer() as ogt:
            logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10)
            print(str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10)
            uh.storeViewsToCache()

        esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name,
                                 time.time(), ogt.elapsed)
Пример #29
0
def runIntakePipeline(uuid):
    eaicf.filter_accuracy(uuid)
    eaist.segment_current_trips(uuid)
    eaiss.segment_current_sections(uuid)
    eaicl.filter_current_sections(uuid)
    eaicr.clean_and_resample(uuid)
Пример #30
0
    ]
    half = old_div(len(filtered_long_term_uuid_list), 2)
    long_term_uuid_list = filtered_long_term_uuid_list[half:]

    logging.info("*" * 10 + "long term UUID list = %s" % long_term_uuid_list)
    for uuid in long_term_uuid_list:
        if uuid is None:
            continue

        logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid +
                     "*" * 10)
        eaicf.filter_accuracy(uuid)

        logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid +
                     "*" * 10)
        eaist.segment_current_trips(uuid)

        logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid +
                     "*" * 10)
        eaiss.segment_current_sections(uuid)

        logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid +
                     "*" * 10)
        eaicl.filter_current_sections(uuid)

        logging.info("*" * 10 +
                     "UUID %s: cleaning and resampling timeline" % uuid +
                     "*" * 10)
        eaicr.clean_and_resample(uuid)

        logging.info(
    def testSegmentationWrapperCombined(self):
        # Change iOS entries to have the android UUID
        tsdb = edb.get_timeseries_db()
        for entry in esta.TimeSeries.get_time_series(
                self.iosUUID).find_entries():
            entry["user_id"] = self.androidUUID
            tsdb.save(entry)
        
        # Now, segment the data for the combined UUID, which will include both
        # android and ios
        eaist.segment_current_trips(self.androidUUID)

        tq_place = estt.TimeQuery("data.enter_ts", 1440658800, 1446847600)
        created_places_entries = esda.get_entries(esda.RAW_PLACE_KEY,
                                                  self.androidUUID, tq_place)

        tq_trip = estt.TimeQuery("data.start_ts", 1440658800, 1446847600)
        created_trips_entries = esda.get_entries(esda.RAW_TRIP_KEY,
                                                 self.androidUUID, tq_trip)

        for i, place in enumerate(created_places_entries):
            logging.debug("Retrieved places %s: %s -> %s" % (i, place.data.enter_fmt_time, place.data.exit_fmt_time))
        for i, trip in enumerate(created_trips_entries):
            logging.debug("Retrieved trips %s: %s -> %s" % (i, trip.data.start_fmt_time, trip.data.end_fmt_time))

        # We expect there to be 12 places, but the first one is that start of
        # the chain, so it has a start_time of None and it won't be retrieved
        # by the query on the start_time that we show here.
        self.assertEqual(len(created_places_entries), 11)
        self.assertEqual(len(created_trips_entries), 11)

        # Pick the first two trips and the first place and ensure that they are all linked correctly
        # Note that this is the first place, not the second place because the true first place will not
        # be retrieved by the query, as shown above
        # The first trip here is a dummy trip, so let's check the second and third trip instead
        trip0time = created_trips_entries[0]
        trip1time = created_trips_entries[1]
        place0time = created_places_entries[0]
        
        self.assertEqual(trip0time.data.end_place, place0time.get_id())
        self.assertEqual(trip1time.data.start_place, place0time.get_id())
        self.assertEqual(place0time.data.ending_trip, trip0time.get_id())
        self.assertEqual(place0time.data.starting_trip, trip1time.get_id())

        self.assertEqual(round(trip0time.data.duration), 11 * 60 + 9)
        self.assertEqual(round(trip1time.data.duration), 6 * 60 + 54)

        self.assertIsNotNone(place0time.data.location)
        
        # There are 8 android trips first (index: 0-7).
        # index 8 is the short, bogus trip
        # So we want to check trips 9 and 10
        trip0dist = created_trips_entries[9]
        trip1dist = created_trips_entries[10]
        place0dist = created_places_entries[9]
        
        self.assertEqual(trip0dist.data.end_place, place0dist.get_id())
        self.assertEqual(trip1dist.data.start_place, place0dist.get_id())
        self.assertEqual(place0dist.data.ending_trip, trip0dist.get_id())
        self.assertEqual(place0dist.data.starting_trip, trip1dist.get_id())

        self.assertEqual(round(trip0dist.data.duration), 58 * 60 + 51)
        self.assertEqual(round(trip1dist.data.duration), 39 * 60 + 49)

        self.assertIsNotNone(place0dist.data.location)
Пример #32
0
def runIntakePipeline(uuid):
    eaicf.filter_accuracy(uuid)
    eaist.segment_current_trips(uuid)
    eaiss.segment_current_sections(uuid)
    eaicl.filter_current_sections(uuid)
    eaicr.clean_and_resample(uuid)
Пример #33
0
def run_intake_pipeline_for_user(uuid):
    uh = euah.UserCacheHandler.getUserCacheHandler(uuid)

    logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10)
    print(
        str(arrow.now()) + "*" * 10 + "UUID %s: moving to long term" % uuid +
        "*" * 10)

    uh.moveToLongTerm()

    # Hack until we delete these spurious entries
    # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868

    if edb.get_timeseries_db().find({"user_id": uuid}).count() == 0:
        logging.debug("Found no entries for %s, skipping" % uuid)
        return

    logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid +
                 "*" * 10)
    print(
        str(arrow.now()) + "*" * 10 +
        "UUID %s: filter accuracy if needed" % uuid + "*" * 10)
    eaicf.filter_accuracy(uuid)

    logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10)
    print(
        str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid +
        "*" * 10)
    eaist.segment_current_trips(uuid)

    logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid +
                 "*" * 10)
    print(
        str(arrow.now()) + "*" * 10 +
        "UUID %s: segmenting into sections" % uuid + "*" * 10)
    eaiss.segment_current_sections(uuid)

    logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10)
    print(
        str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid +
        "*" * 10)
    eaicl.filter_current_sections(uuid)

    logging.info("*" * 10 +
                 "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10)
    print(
        str(arrow.now()) + "*" * 10 +
        "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10)
    eaicr.clean_and_resample(uuid)

    logging.info("*" * 10 +
                 "UUID %s: checking active mode trips to autocheck habits" %
                 uuid + "*" * 10)
    print(
        str(arrow.now()) + "*" * 10 +
        "UUID %s: checking active mode trips to autocheck habits" % uuid +
        "*" * 10)
    autocheck.give_points_for_all_tasks(uuid)

    logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid +
                 "*" * 10)
    print(
        str(arrow.now()) + "*" * 10 +
        "UUID %s: storing views to cache" % uuid + "*" * 10)
    uh.storeViewsToCache()
    def testSegmentationWrapperCombined(self):
        # Change iOS entries to have the android UUID
        for entry in esta.TimeSeries.get_time_series(self.iosUUID).find_entries():
            entry["user_id"] = self.androidUUID
            edb.get_timeseries_db().save(entry)
        
        # Now, segment the data for the combined UUID, which will include both
        # android and ios
        eaist.segment_current_trips(self.androidUUID)

        tq_place = enua.UserCache.TimeQuery("enter_ts", 1440658800, 1446847600)
        created_places = esdp.get_places(self.androidUUID, tq_place)

        tq_trip = enua.UserCache.TimeQuery("start_ts", 1440658800, 1446847600)
        created_trips = esdt.get_trips(self.androidUUID, tq_trip)

        for i, place in enumerate(created_places):
            logging.debug("Retrieved places %s: %s -> %s" % (i, place.enter_fmt_time, place.exit_fmt_time))
        for i, trip in enumerate(created_trips):
            logging.debug("Retrieved trips %s: %s -> %s" % (i, trip.start_fmt_time, trip.end_fmt_time))

        # We expect there to be 12 places, but the first one is that start of
        # the chain, so it has a start_time of None and it won't be retrieved
        # by the query on the start_time that we show here.
        self.assertEqual(len(created_places), 11)
        self.assertEqual(len(created_trips), 11)

        # Pick the first two trips and the first place and ensure that they are all linked correctly
        # Note that this is the first place, not the second place because the true first place will not
        # be retrieved by the query, as shown above
        # The first trip here is a dummy trip, so let's check the second and third trip instead
        trip0time = created_trips[0]
        trip1time = created_trips[1]
        place0time = created_places[0]
        
        self.assertEqual(trip0time.end_place, place0time.get_id())
        self.assertEqual(trip1time.start_place, place0time.get_id())
        self.assertEqual(place0time.ending_trip, trip0time.get_id())
        self.assertEqual(place0time.starting_trip, trip1time.get_id())

        self.assertEqual(round(trip0time.duration), 11 * 60 + 9)
        self.assertEqual(round(trip1time.duration), 6 * 60 + 54)

        self.assertIsNotNone(place0time.location)
        
        # There are 8 android trips first (index: 0-7).
        # index 8 is the short, bogus trip
        # So we want to check trips 9 and 10
        trip0dist = created_trips[9]
        trip1dist = created_trips[10]
        place0dist = created_places[9]
        
        self.assertEqual(trip0dist.end_place, place0dist.get_id())
        self.assertEqual(trip1dist.start_place, place0dist.get_id())
        self.assertEqual(place0dist.ending_trip, trip0dist.get_id())
        self.assertEqual(place0dist.starting_trip, trip1dist.get_id())

        self.assertEqual(round(trip0dist.duration), 58 * 60 + 51)
        self.assertEqual(round(trip1dist.duration), 38 * 60 + 57)

        self.assertIsNotNone(place0dist.location)
 def testEmptyCall(self):
     import uuid
     dummyUserId = uuid.uuid4()
     # We just expect that this won't raise an exception
     eaist.segment_current_trips(dummyUserId)
def run_pipeline():
    edb.pm_address = request.json['pm_address']
    print(edb.pm_address)
    # uuid is a filler and just needs to be consistent for each user.
    # These can be removed but require refactoring all code locations
    # that use the uuid.
    uuid = request.json['uuid']
    uh = euah.UserCacheHandler.getUserCacheHandler(uuid)

    with ect.Timer() as uct:
        logging.info("*" * 10 + "moving to long term" + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "moving to long term" + "*" * 10)
        uh.moveToLongTerm()

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name,
                             time.time(), uct.elapsed)


    # Hack until we delete these spurious entries
    # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868
    # Hack no longer works after the stats are in the timeseries because
    # every user, even really old ones, have the pipeline run for them,
    # which inserts pipeline_time stats.
    # Let's strip out users who only have pipeline_time entries in the timeseries
    # I wonder if this (distinct versus count) is the reason that the pipeline has
    # become so much slower recently. Let's try to actually delete the
    # spurious entries or at least mark them as obsolete and see if that helps.
    print(edb.get_timeseries_db().find({"user_id": uuid}).distinct("metadata.key"))

    if edb.get_timeseries_db().find({"user_id": uuid}).distinct("metadata.key") == ["stats/pipeline_time"]:
        logging.debug("Found no entries for %s, skipping" % uuid)
        return

    with ect.Timer() as aft:
        logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10)
        eaicf.filter_accuracy(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name,
                             time.time(), aft.elapsed)

    with ect.Timer() as tst:
        logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10)
        eaist.segment_current_trips(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name,
                             time.time(), tst.elapsed)

    with ect.Timer() as sst:
        logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10)
        eaiss.segment_current_sections(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.SECTION_SEGMENTATION.name,
                             time.time(), sst.elapsed)

    with ect.Timer() as jst:
        logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10)
        eaicl.filter_current_sections(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name,
                             time.time(), jst.elapsed)

    with ect.Timer() as crt:
        logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10)
        eaicr.clean_and_resample(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name,
                             time.time(), crt.elapsed)

    with ect.Timer() as crt:
        logging.info("*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10)
        eacimp.predict_mode(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.MODE_INFERENCE.name,
                             time.time(), crt.elapsed)

    with ect.Timer() as ogt:
        logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10)
        # use store data
        uh.storeViewsToCache()

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name,
                             time.time(), ogt.elapsed)
def run_intake_pipeline_for_user(uuid):
    uh = euah.UserCacheHandler.getUserCacheHandler(uuid)

    with ect.Timer() as uct:
        logging.info("*" * 10 + "UUID %s: moving to long term" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: moving to long term" % uuid + "*" * 10)
        uh.moveToLongTerm()

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name,
                             time.time(), uct.elapsed)

    # Hack until we delete these spurious entries
    # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868

    if edb.get_timeseries_db().find({"user_id": uuid}).count() == 0:
        logging.debug("Found no entries for %s, skipping" % uuid)
        return

    with ect.Timer() as aft:
        logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: filter accuracy if needed" % uuid + "*" * 10)
        eaicf.filter_accuracy(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name,
                             time.time(), aft.elapsed)

    with ect.Timer() as tst:
        logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: segmenting into trips" % uuid + "*" * 10)
        eaist.segment_current_trips(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name,
                             time.time(), tst.elapsed)

    with ect.Timer() as sst:
        logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: segmenting into sections" % uuid + "*" * 10)
        eaiss.segment_current_sections(uuid)

    esds.store_pipeline_time(uuid,
                             ecwp.PipelineStages.SECTION_SEGMENTATION.name,
                             time.time(), sst.elapsed)

    with ect.Timer() as jst:
        logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: smoothing sections" % uuid + "*" * 10)
        eaicl.filter_current_sections(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name,
                             time.time(), jst.elapsed)

    with ect.Timer() as crt:
        logging.info("*" * 10 +
                     "UUID %s: cleaning and resampling timeline" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10)
        eaicr.clean_and_resample(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name,
                             time.time(), crt.elapsed)

    with ect.Timer() as act:
        logging.info(
            "*" * 10 +
            "UUID %s: checking active mode trips to autocheck habits" % uuid +
            "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: checking active mode trips to autocheck habits" % uuid +
            "*" * 10)
        autocheck.give_points_for_all_tasks(uuid)

    esds.store_pipeline_time(uuid, "AUTOCHECK_POINTS", time.time(),
                             act.elapsed)

    with ect.Timer() as ogt:
        logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: storing views to cache" % uuid + "*" * 10)
        uh.storeViewsToCache()

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name,
                             time.time(), ogt.elapsed)
Пример #38
0
 def testPlaceTripTimeline(self):
     eaist.segment_current_trips(self.testUUID)
     tl = esdt.get_raw_timeline(self.testUUID, self.day_start_ts, self.day_end_ts)
     self.checkPlaceTripConsistency(tl)
Пример #39
0
 def testDatetimeTimeline(self):
     eaist.segment_current_trips(self.testUUID)
     tl = esdt.get_raw_timeline_from_dt(self.testUUID,
                                        self.day_start_dt, self.day_end_dt)
     self.checkPlaceTripConsistency(tl)
 def testDatetimeTimeline(self):
     eaist.segment_current_trips(self.testUUID)
     tl = esdt.get_raw_timeline_from_dt(self.testUUID, self.day_start_dt,
                                        self.day_end_dt)
     self.checkPlaceTripConsistency(tl)
 def testPlaceTripTimeline(self):
     eaist.segment_current_trips(self.testUUID)
     tl = esdt.get_raw_timeline(self.testUUID, self.day_start_ts,
                                self.day_end_ts)
     self.checkPlaceTripConsistency(tl)
 def testEmptyCall(self):
     import uuid
     dummyUserId = uuid.uuid4()
     # We just expect that this won't raise an exception
     eaist.segment_current_trips(dummyUserId)
    def testSegmentationWrapperCombined(self):
        # Change iOS entries to have the android UUID
        tsdb = edb.get_timeseries_db()
        for entry in esta.TimeSeries.get_time_series(
                self.iosUUID).find_entries():
            entry["user_id"] = self.androidUUID
            edb.save(tsdb, entry)
        
        # Now, segment the data for the combined UUID, which will include both
        # android and ios
        eaist.segment_current_trips(self.androidUUID)

        tq_place = estt.TimeQuery("data.enter_ts", 1440658800, 1446847600)
        created_places_entries = esda.get_entries(esda.RAW_PLACE_KEY,
                                                  self.androidUUID, tq_place)

        tq_trip = estt.TimeQuery("data.start_ts", 1440658800, 1446847600)
        created_trips_entries = esda.get_entries(esda.RAW_TRIP_KEY,
                                                 self.androidUUID, tq_trip,
                                                 untracked_key=esda.RAW_UNTRACKED_KEY)

        for i, place in enumerate(created_places_entries):
            logging.debug("Retrieved places %s: %s -> %s" % (i, place.data.enter_fmt_time, place.data.exit_fmt_time))
        for i, trip in enumerate(created_trips_entries):
            logging.debug("Retrieved trips %s: %s -> %s" % (i, trip.data.start_fmt_time, trip.data.end_fmt_time))

        # We expect there to be 12 places, but the first one is that start of
        # the chain, so it has a start_time of None and it won't be retrieved
        # by the query on the start_time that we show here.
        self.assertEqual(len(created_places_entries), 11)
        self.assertEqual(len(created_trips_entries), 11)

        # Pick the first two trips and the first place and ensure that they are all linked correctly
        # Note that this is the first place, not the second place because the true first place will not
        # be retrieved by the query, as shown above
        # The first trip here is a dummy trip, so let's check the second and third trip instead
        trip0time = created_trips_entries[0]
        trip1time = created_trips_entries[1]
        place0time = created_places_entries[0]
        
        self.assertEqual(trip0time.data.end_place, place0time.get_id())
        self.assertEqual(trip1time.data.start_place, place0time.get_id())
        self.assertEqual(place0time.data.ending_trip, trip0time.get_id())
        self.assertEqual(place0time.data.starting_trip, trip1time.get_id())

        self.assertEqual(round(trip0time.data.duration), 11 * 60 + 9)
        self.assertEqual(round(trip1time.data.duration), 6 * 60 + 54)

        self.assertIsNotNone(place0time.data.location)
        
        # There are 9 android "trips" first (index: 0-8), including the untracked time
        # index 9 is the short, bogus trip
        # So we want to check trips 10 and 11
        trip0dist = created_trips_entries[9]
        trip1dist = created_trips_entries[10]
        place0dist = created_places_entries[9]
        
        self.assertEqual(trip0dist.data.end_place, place0dist.get_id())
        self.assertEqual(trip1dist.data.start_place, place0dist.get_id())
        self.assertEqual(place0dist.data.ending_trip, trip0dist.get_id())
        self.assertEqual(place0dist.data.starting_trip, trip1dist.get_id())

        self.assertEqual(round(trip0dist.data.duration), 14 * 60 + 41)
        self.assertEqual(round(trip1dist.data.duration), 1 * 60 * 60 + 50 * 60 + 56)

        self.assertIsNotNone(place0dist.data.location)
Пример #44
0
def run_intake_pipeline_for_user(uuid):
    uh = euah.UserCacheHandler.getUserCacheHandler(uuid)

    with ect.Timer() as uct:
        logging.info("*" * 10 + "UUID %s: moving to long term" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: moving to long term" % uuid + "*" * 10)
        uh.moveToLongTerm()

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name,
                             time.time(), uct.elapsed)

    with ect.Timer() as uit:
        logging.info("*" * 10 +
                     "UUID %s: updating incoming user inputs" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: updating incoming user inputs" % uuid + "*" * 10)
        eaum.match_incoming_user_inputs(uuid)

    esds.store_pipeline_time(
        uuid, ecwp.PipelineStages.USER_INPUT_MATCH_INCOMING.name, time.time(),
        uct.elapsed)

    # Hack until we delete these spurious entries
    # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868
    # Hack no longer works after the stats are in the timeseries because
    # every user, even really old ones, have the pipeline run for them,
    # which inserts pipeline_time stats.
    # Let's strip out users who only have pipeline_time entries in the timeseries
    # I wonder if this (distinct versus count) is the reason that the pipeline has
    # become so much slower recently. Let's try to actually delete the
    # spurious entries or at least mark them as obsolete and see if that helps.
    if edb.get_timeseries_db().find({
            "user_id": uuid
    }).distinct("metadata.key") == ["stats/pipeline_time"]:
        logging.debug("Found no entries for %s, skipping" % uuid)
        return

    with ect.Timer() as aft:
        logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: filter accuracy if needed" % uuid + "*" * 10)
        eaicf.filter_accuracy(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name,
                             time.time(), aft.elapsed)

    with ect.Timer() as tst:
        logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: segmenting into trips" % uuid + "*" * 10)
        eaist.segment_current_trips(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name,
                             time.time(), tst.elapsed)

    with ect.Timer() as sst:
        logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: segmenting into sections" % uuid + "*" * 10)
        eaiss.segment_current_sections(uuid)

    esds.store_pipeline_time(uuid,
                             ecwp.PipelineStages.SECTION_SEGMENTATION.name,
                             time.time(), sst.elapsed)

    with ect.Timer() as jst:
        logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: smoothing sections" % uuid + "*" * 10)
        eaicl.filter_current_sections(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name,
                             time.time(), jst.elapsed)

    with ect.Timer() as crt:
        logging.info("*" * 10 +
                     "UUID %s: cleaning and resampling timeline" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10)
        eaicr.clean_and_resample(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name,
                             time.time(), crt.elapsed)

    with ect.Timer() as crt:
        logging.info("*" * 10 +
                     "UUID %s: inferring transportation mode" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: inferring transportation mode" % uuid + "*" * 10)
        eacimr.predict_mode(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.MODE_INFERENCE.name,
                             time.time(), crt.elapsed)

    with ect.Timer() as crt:
        logging.info("*" * 10 + "UUID %s: creating confirmed objects " % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: creating confirmed objects " % uuid + "*" * 10)
        eaum.create_confirmed_objects(uuid)

    esds.store_pipeline_time(uuid,
                             ecwp.PipelineStages.CREATE_CONFIRMED_OBJECTS.name,
                             time.time(), crt.elapsed)

    with ect.Timer() as ogt:
        logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: storing views to cache" % uuid + "*" * 10)
        uh.storeViewsToCache()

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name,
                             time.time(), ogt.elapsed)
    for uuid in cache_uuid_list:
        logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10)
        uh = euah.UserCacheHandler.getUserCacheHandler(uuid)
        uh.moveToLongTerm()

    # TODO: For now, move filters from metadata to data. Once we get the
    # updated data collection clients to people, we don't need to do this any
    # more
    import emission.storage.timeseries.format_hacks.move_filter_field as estfm
    estfm.move_all_filters_to_data()

    long_term_uuid_list = esta.TimeSeries.get_uuid_list()
    logging.info("*" * 10 + "long term UUID list = %s" % long_term_uuid_list)
    for uuid in long_term_uuid_list:
        logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10)
        eaicf.filter_accuracy(uuid)
        
        logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10)
        eaist.segment_current_trips(uuid)

        logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10)
        eaiss.segment_current_sections(uuid)

        logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10)
        eaicl.filter_current_sections(uuid)

        logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10)
        uh = euah.UserCacheHandler.getUserCacheHandler(uuid)
        uh.storeViewsToCache()
Пример #46
0
def runIntakePipeline(uuid):
    eaicf.filter_accuracy(uuid)
    estfm.move_all_filters_to_data()
    eaist.segment_current_trips(uuid)
    eaiss.segment_current_sections(uuid)
    eaicr.clean_and_resample(uuid)