Пример #1
0
    def setUp(self):
        etc.setupRealExample(
            self, "emission/tests/data/real_examples/shankari_2015-aug-27")
        # eaicf.filter_accuracy(self.testUUID)
        etc.runIntakePipeline(self.testUUID)
        # estfm.move_all_filters_to_data()
        logging.info("After loading, timeseries db size = %s" %
                     edb.get_timeseries_db().count())
        self.day_start_ts = 1440658800
        self.day_end_ts = 1440745200
        self.day_start_dt = esdldq.get_local_date(self.day_start_ts,
                                                  "America/Los_Angeles")
        self.day_end_dt = esdldq.get_local_date(self.day_end_ts,
                                                "America/Los_Angeles")

        # If we don't delete the time components, we end up with the upper and
        # lower bounds = 0, which basically matches nothing.
        del self.day_start_dt['hour']
        del self.day_end_dt['hour']

        del self.day_start_dt['minute']
        del self.day_end_dt['minute']

        del self.day_start_dt['second']
        del self.day_end_dt['second']
    def testAug10(self):
        # This is a more complex day. Tests:
        # PR #302 (trip to optometrist)
        # PR #352 (split optometrist trip)

        dataFile = "emission/tests/data/real_examples/shankari_2016-08-10"
        ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 10})
        cacheKey = "diary/trips-2016-08-10"
        ground_truth = json.load(open(dataFile + ".ground_truth"),
                                 object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)
        # runIntakePipeline does not run the common trips, habitica or store views to cache
        # So let's manually store to the cache
        # tc_query = estt.TimeComponentQuery("data.star_local_dt", ld, ld)
        # enuah.UserCacheHandler.getUserCacheHandler(self.testUUID).storeTimelineToCache(tc_query)

        # cached_result = edb.get_usercache_db().find_one({'user_id': self.testUUID,
        #                                                  "metadata.key": cacheKey})
        api_result = gfc.get_geojson_for_dt(self.testUUID, ld, ld)

        # self.compare_result(cached_result, ground_truth)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth).data)
    def testZeroDurationPlaceInterpolationSingleSync(self):
        # Test for 545114feb5ac15caac4110d39935612525954b71
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-01-12"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-01-13"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 1, 'day': 12})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 1, 'day': 13})
        cacheKey_1 = "diary/trips-2016-01-12"
        cacheKey_2 = "diary/trips-2016-01-13"
        ground_truth_1 = json.load(open(dataFile_1 + ".ground_truth"),
                                   object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2 + ".ground_truth"),
                                   object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile_1)
        self.entries = json.load(open(dataFile_2), object_hook=bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1,
                                            start_ld_1)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2,
                                            start_ld_2)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_2).data)
    def testJun20(self):
        # This is a fairly straightforward day. Tests mainly:
        # - ordering of trips
        # - handling repeated location entries with different write timestamps
        # We have two identical location points with ts = 1466436483.395 and write_ts = 1466436496.4, 1466436497.047
        dataFile = "emission/tests/data/real_examples/shankari_2016-06-20"
        ld = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20})
        cacheKey = "diary/trips-2016-06-20"
        with open(dataFile + ".ground_truth") as gfp:
            ground_truth = json.load(gfp, object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)
        # runIntakePipeline does not run the common trips, habitica or store views to cache
        # So let's manually store to the cache
        # tc_query = estt.TimeComponentQuery("data.star_local_dt", ld, ld)
        # enuah.UserCacheHandler.getUserCacheHandler(self.testUUID).storeTimelineToCache(tc_query)

        # cached_result = edb.get_usercache_db().find_one({'user_id': self.testUUID,
        #                                                  "metadata.key": cacheKey})
        api_result = gfc.get_geojson_for_dt(self.testUUID, ld, ld)

        # self.compare_result(cached_result, ground_truth)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth).data)
    def testAug11(self):
        # This is a more complex day. Tests:
        # PR #352 (should not split trip to Oakland)
        # PR #348 (trip from station to OAK DOT)
        # PR #357 (trip to Radio Shack is complete and not truncated)
        # PR #345 (no cleaned trips are skipped)

        dataFile = "emission/tests/data/real_examples/shankari_2016-08-11"
        ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 11})
        cacheKey = "diary/trips-2016-08-11"
        with open(dataFile + ".ground_truth") as gfp:
            ground_truth = json.load(gfp, object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)
        # runIntakePipeline does not run the common trips, habitica or store views to cache
        # So let's manually store to the cache
        # tc_query = estt.TimeComponentQuery("data.star_local_dt", ld, ld)
        # enuah.UserCacheHandler.getUserCacheHandler(self.testUUID).storeTimelineToCache(tc_query)

        # cached_result = edb.get_usercache_db().find_one({'user_id': self.testUUID,
        #                                                  "metadata.key": cacheKey})
        api_result = gfc.get_geojson_for_dt(self.testUUID, ld, ld)

        # self.compare_result(cached_result, ground_truth)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth).data)
    def testJul22SplitAroundReboot(self):
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1 + ".ground_truth"),
                                   object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2 + ".ground_truth"),
                                   object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook=bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1,
                                            start_ld_1)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2,
                                            start_ld_2)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_2).data)
    def testJun21(self):
        # This is a more complex day. Tests:
        # PR #357 (spurious trip at 14:00 should be segmented and skipped)
        # PR #358 (trip back from bella's house at 16:00)

        dataFile = "emission/tests/data/real_examples/shankari_2016-06-21"
        ld = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 21})
        cacheKey = "diary/trips-2016-06-21"
        with open(dataFile + ".ground_truth") as gfp:
            ground_truth = json.load(gfp, object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)
        # runIntakePipeline does not run the common trips, habitica or store views to cache
        # So let's manually store to the cache
        # tc_query = estt.TimeComponentQuery("data.star_local_dt", ld, ld)
        # enuah.UserCacheHandler.getUserCacheHandler(self.testUUID).storeTimelineToCache(tc_query)

        # cached_result = edb.get_usercache_db().find_one({'user_id': self.testUUID,
        #                                                  "metadata.key": cacheKey})
        api_result = gfc.get_geojson_for_dt(self.testUUID, ld, ld)

        # self.compare_result(cached_result, ground_truth)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth).data)
    def testAug10(self):
        # This is a more complex day. Tests:
        # PR #302 (trip to optometrist)
        # PR #352 (split optometrist trip)

        dataFile = "emission/tests/data/real_examples/shankari_2016-08-10"
        ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 10})
        cacheKey = "diary/trips-2016-08-10"
        with open(dataFile+".ground_truth") as gfp:
            ground_truth = json.load(gfp, object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)
        # runIntakePipeline does not run the common trips, habitica or store views to cache
        # So let's manually store to the cache
        # tc_query = estt.TimeComponentQuery("data.star_local_dt", ld, ld)
        # enuah.UserCacheHandler.getUserCacheHandler(self.testUUID).storeTimelineToCache(tc_query)

        # cached_result = edb.get_usercache_db().find_one({'user_id': self.testUUID,
        #                                                  "metadata.key": cacheKey})
        api_result = gfc.get_geojson_for_dt(self.testUUID, ld, ld)

        # self.compare_result(cached_result, ground_truth)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data)
    def testAug11(self):
        # This is a more complex day. Tests:
        # PR #352 (should not split trip to Oakland)
        # PR #348 (trip from station to OAK DOT)
        # PR #357 (trip to Radio Shack is complete and not truncated)
        # PR #345 (no cleaned trips are skipped)

        dataFile = "emission/tests/data/real_examples/shankari_2016-08-11"
        ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 11})
        cacheKey = "diary/trips-2016-08-11"
        ground_truth = json.load(open(dataFile+".ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)
        # runIntakePipeline does not run the common trips, habitica or store views to cache
        # So let's manually store to the cache
        # tc_query = estt.TimeComponentQuery("data.star_local_dt", ld, ld)
        # enuah.UserCacheHandler.getUserCacheHandler(self.testUUID).storeTimelineToCache(tc_query)

        # cached_result = edb.get_usercache_db().find_one({'user_id': self.testUUID,
        #                                                  "metadata.key": cacheKey})
        api_result = gfc.get_geojson_for_dt(self.testUUID, ld, ld)

        # self.compare_result(cached_result, ground_truth)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data)
    def testJun20(self):
        # This is a fairly straightforward day. Tests mainly:
        # - ordering of trips
        # - handling repeated location entries with different write timestamps
        # We have two identical location points with ts = 1466436483.395 and write_ts = 1466436496.4, 1466436497.047
        dataFile = "emission/tests/data/real_examples/shankari_2016-06-20"
        ld = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20})
        cacheKey = "diary/trips-2016-06-20"
        with open(dataFile+".ground_truth") as gfp:
            ground_truth = json.load(gfp, object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)
        # runIntakePipeline does not run the common trips, habitica or store views to cache
        # So let's manually store to the cache
        # tc_query = estt.TimeComponentQuery("data.star_local_dt", ld, ld)
        # enuah.UserCacheHandler.getUserCacheHandler(self.testUUID).storeTimelineToCache(tc_query)

        # cached_result = edb.get_usercache_db().find_one({'user_id': self.testUUID,
        #                                                  "metadata.key": cacheKey})
        api_result = gfc.get_geojson_for_dt(self.testUUID, ld, ld)

        # self.compare_result(cached_result, ground_truth)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data)
    def testZeroDurationPlaceInterpolationMultiSync(self):
        # Test for 545114feb5ac15caac4110d39935612525954b71
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-01-12"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-01-13"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 1, 'day': 12})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 1, 'day': 13})
        cacheKey_1 = "diary/trips-2016-01-12"
        cacheKey_2 = "diary/trips-2016-01-13"
        ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)
    def testJul22SplitAroundReboot(self):
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)
Пример #13
0
    def testIsMatchedUser(self):
        # Load data for the Bay Area
        dataFileba = "emission/tests/data/real_examples/shankari_2016-06-20"
        ldba = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20})

        etc.setupRealExample(self, dataFileba)
        testUUIDba = self.testUUID
        etc.runIntakePipeline(testUUIDba)
        logging.debug("uuid for the bay area = %s " % testUUIDba)

        # Load data for Hawaii
        dataFilehi = "emission/tests/data/real_examples/shankari_2016-07-27"
        ldhi = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 27})

        etc.setupRealExample(self, dataFilehi)
        testUUIDhi = self.testUUID
        etc.runIntakePipeline(testUUIDhi)

        logging.debug("uuid for hawaii = %s " % testUUIDhi)

        self.testUUIDList = [testUUIDba, testUUIDhi]

        air_query_spec = {
            "time_type":
            "local_date",
            "from_local_date": {
                "year": 2016,
                "month": 2
            },
            "to_local_date": {
                "year": 2016,
                "month": 9
            },
            "freq":
            'DAILY',
            "checks": [{
                "modes": ['WALKING', 'ON_FOOT'],
                "metric": "count",
                "threshold": {
                    "$gt": 5
                }
            }, {
                "modes": ['AIR_OR_HSR'],
                "metric": "count",
                "threshold": {
                    "$gt": 1
                }
            }]
        }

        # Since this requires at least one air trip, this will only return the
        # hawaii trip

        self.assertTrue(tripmetrics.is_matched_user(testUUIDhi,
                                                    air_query_spec))
        self.assertFalse(
            tripmetrics.is_matched_user(testUUIDba, air_query_spec))
Пример #14
0
    def testAug10MultiSyncEndNotDetected(self):
        # Re-run, but with multiple calls to sync data
        # This tests the effect of online versus offline analysis and segmentation with potentially partial data

        dataFile = "emission/tests/data/real_examples/shankari_2016-08-10"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 9})
        end_ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 10})
        cacheKey = "diary/trips-2016-08-10"
        with open(
                "emission/tests/data/real_examples/shankari_2016-08-910.ground_truth"
        ) as gtf:
            ground_truth = json.load(gtf, object_hook=bju.object_hook)

        logging.info("Before loading, timeseries db size = %s" %
                     edb.get_timeseries_db().estimated_document_count())
        with open(dataFile) as df:
            all_entries = json.load(df, object_hook=bju.object_hook)
        ts_1030 = arrow.get("2016-08-10T10:30:00-07:00").timestamp
        logging.debug("ts_1030 = %s, converted back = %s" %
                      (ts_1030, arrow.get(ts_1030).to("America/Los_Angeles")))
        before_1030_entries = [
            e for e in all_entries
            if ad.AttrDict(e).metadata.write_ts <= ts_1030
        ]
        after_1030_entries = [
            e for e in all_entries
            if ad.AttrDict(e).metadata.write_ts > ts_1030
        ]

        # First load all data from the 9th. Otherwise, the missed trip is the first trip,
        # and we don't set the last_ts_processed
        # See the code around "logging.debug("len(segmentation_points) == 0, early return")"
        etc.setupRealExample(
            self, "emission/tests/data/real_examples/shankari_2016-08-09")

        # Sync at 10:30 to capture all the points on the trip *to* the optometrist
        # Skip the last few points to ensure that the trip end is skipped
        self.entries = before_1030_entries[0:-2]
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

        # Then sync after 10:30
        self.entries = after_1030_entries
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)
        self.persistGroundTruthIfNeeded(api_result, dataFile, start_ld,
                                        cacheKey)

        # Although we process the day's data in two batches, we should get the same result
        self.compare_approx_result(ad.AttrDict({
            'result': api_result
        }).result,
                                   ad.AttrDict(ground_truth).data,
                                   time_fuzz=60,
                                   distance_fuzz=100)
    def testSunilShortTrips(self):
        dataFile = "emission/tests/data/real_examples/sunil_2016-07-27"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 27})
        cacheKey = "diary/trips-2016-07-27"
        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld)
        # Although we process the day's data in two batches, we should get the same result
        self.assertEqual(api_result, [])
    def testAirTripHawaiiEnd(self):
        dataFile = "emission/tests/data/real_examples/shankari_2016-08-04"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 4})
        cacheKey = "diary/trips-2016-07-27"
        ground_truth = json.load(open(dataFile+".ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data)
    def testSunilShortTrips(self):
        dataFile = "emission/tests/data/real_examples/sunil_2016-07-27"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 27})
        cacheKey = "diary/trips-2016-07-27"
        ground_truth = json.load(open(dataFile+".ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld)
        # Although we process the day's data in two batches, we should get the same result
        self.assertEqual(api_result, [])
Пример #18
0
    def testOct07MultiSyncSpuriousEndDetected(self):
        # Re-run, but with multiple calls to sync data
        # This tests the effect of online versus offline analysis and segmentation with potentially partial data

        dataFile = "emission/tests/data/real_examples/issue_436_assertion_error"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 10, 'day': 0o7})
        end_ld = ecwl.LocalDate({'year': 2016, 'month': 10, 'day': 0o7})
        cacheKey = "diary/trips-2016-10-07"
        with open(dataFile + ".ground_truth") as gtf:
            ground_truth = json.load(gtf, object_hook=bju.object_hook)

        logging.info("Before loading, timeseries db size = %s" %
                     edb.get_timeseries_db().estimated_document_count())
        with open(dataFile) as df:
            all_entries = json.load(df, object_hook=bju.object_hook)
        # 18:01 because the transition was at 2016-02-22T18:00:09.623404-08:00, so right after
        # 18:00
        ts_1800 = arrow.get("2016-10-07T18:33:11-07:00").timestamp
        logging.debug("ts_1800 = %s, converted back = %s" %
                      (ts_1800, arrow.get(ts_1800).to("America/Los_Angeles")))
        before_1800_entries = [
            e for e in all_entries
            if ad.AttrDict(e).metadata.write_ts <= ts_1800
        ]
        after_1800_entries = [
            e for e in all_entries
            if ad.AttrDict(e).metadata.write_ts > ts_1800
        ]

        # Sync at 18:00 to capture all the points on the trip *to* the optometrist
        # Skip the last few points to ensure that the trip end is skipped
        etc.createAndFillUUID(self)
        self.entries = before_1800_entries
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

        # Then sync after 18:00
        self.entries = after_1800_entries
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)
        self.persistGroundTruthIfNeeded(api_result, dataFile, start_ld,
                                        cacheKey)

        # Although we process the day's data in two batches, we should get the same result
        self.compare_approx_result(ad.AttrDict({
            'result': api_result
        }).result,
                                   ad.AttrDict(ground_truth).data,
                                   time_fuzz=60,
                                   distance_fuzz=100)
    def testTsMismatch(self):
        # Test for https://github.com/e-mission/e-mission-server/issues/457
        dataFile = "emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 12, 'day': 12})
        cacheKey = "diary/trips-2016-12-12"
        ground_truth = json.load(open("emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12.ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data)
    def testJumpSmoothingSectionStart(self):
        dataFile = "emission/tests/data/real_examples/shankari_2016-independence_day_jump_bus_start"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 15})
        cacheKey = "diary/trips-2016-08-15"
        ground_truth = json.load(open("emission/tests/data/real_examples/shankari_2016-independence_day.ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data)
    def testOverriddenModeHack(self):
        # Test for https://github.com/e-mission/e-mission-server/issues/457
        dataFile = "emission/tests/data/real_examples/test_overriden_mode_hack.jul-31"
        start_ld = ecwl.LocalDate({'year': 2017, 'month': 7, 'day': 31})
        cacheKey = "diary/trips-2017-07-31"
        ground_truth = json.load(open("emission/tests/data/real_examples/test_overriden_mode_hack.jul-31.ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data)
    def testJumpSmoothingSectionStart(self):
        dataFile = "emission/tests/data/real_examples/shankari_2016-independence_day_jump_bus_start"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 15})
        cacheKey = "diary/trips-2016-08-15"
        ground_truth = json.load(open("emission/tests/data/real_examples/shankari_2016-independence_day.ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data)
    def testOverriddenModeHack(self):
        # Test for https://github.com/e-mission/e-mission-server/issues/457
        dataFile = "emission/tests/data/real_examples/test_overriden_mode_hack.jul-31"
        start_ld = ecwl.LocalDate({'year': 2017, 'month': 7, 'day': 31})
        cacheKey = "diary/trips-2017-07-31"
        ground_truth = json.load(open("emission/tests/data/real_examples/test_overriden_mode_hack.jul-31.ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data)
    def testJan16SpeedAssert(self):
        # Test for https://github.com/e-mission/e-mission-server/issues/457
        dataFile = "emission/tests/data/real_examples/another_speed_assertion_failure.jan-16"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 1, 'day': 16})
        cacheKey = "diary/trips-2016-01-16"
        ground_truth = json.load(open("emission/tests/data/real_examples/another_speed_assertion_failure.jan-16.ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data)
    def testJan16SpeedAssert(self):
        # Test for https://github.com/e-mission/e-mission-server/issues/457
        dataFile = "emission/tests/data/real_examples/another_speed_assertion_failure.jan-16"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 1, 'day': 16})
        cacheKey = "diary/trips-2016-01-16"
        ground_truth = json.load(open("emission/tests/data/real_examples/another_speed_assertion_failure.jan-16.ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data)
    def testTsMismatch(self):
        # Test for https://github.com/e-mission/e-mission-server/issues/457
        dataFile = "emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 12, 'day': 12})
        cacheKey = "diary/trips-2016-12-12"
        ground_truth = json.load(open("emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12.ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data)
    def testIosJumpsAndUntrackedSquishing(self):
        # Test for a2c0ee4e3ceafa822425ceef299dcdb01c9b32c9
        dataFile = "emission/tests/data/real_examples/sunil_2016-07-20"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 20})
        cacheKey = "diary/trips-2016-07-20"
        ground_truth = json.load(open("emission/tests/data/real_examples/sunil_2016-07-20.ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data)
Пример #28
0
    def testUserInputRealData(self):
        np.random.seed(61297777)
        dataFile = "emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12"
        etc.setupRealExample(self, dataFile)
        self.testUserId = self.testUUID
        # At this point, we have only raw data, no trips
        etc.runIntakePipeline(self.testUUID)
        # At this point, we have trips

        # Let's retrieve them
        ts = esta.TimeSeries.get_time_series(self.testUUID)
        ct_df = ts.get_data_df("analysis/cleaned_trip", time_query=None)
        self.assertEqual(len(ct_df), 4)

        # Now, let's load the mode_confirm and purpose_confirm objects
        mode_confirm_list = json.load(open(
            "emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12.mode_confirm"
        ),
                                      object_hook=bju.object_hook)
        self.assertEqual(len(mode_confirm_list), 5)

        purpose_confirm_list = json.load(open(
            "emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12.purpose_confirm"
        ),
                                         object_hook=bju.object_hook)
        self.assertEqual(len(purpose_confirm_list), 7)

        for mc in mode_confirm_list:
            mc["user_id"] = self.testUUID
            ts.insert(mc)

        for pc in purpose_confirm_list:
            pc["user_id"] = self.testUUID
            ts.insert(pc)

        mc_label_list = []
        pc_label_list = []
        for trip_id in ct_df._id:
            mc = esdt.get_user_input_for_trip(esda.CLEANED_TRIP_KEY,
                                              self.testUserId, ct_df._id[0],
                                              "manual/mode_confirm")
            mc_label_list.append(mc.data.label)

            pc = esdt.get_user_input_for_trip(esda.CLEANED_TRIP_KEY,
                                              self.testUserId, ct_df._id[0],
                                              "manual/purpose_confirm")
            pc_label_list.append(pc.data.label)

        self.assertEqual(mc_label_list, 4 * ['bike'])
        self.assertEqual(pc_label_list, 4 * ['pick_drop'])
    def testFeb22MultiSyncEndNotDetected(self):
        # Re-run, but with multiple calls to sync data
        # This tests the effect of online versus offline analysis and segmentation with potentially partial data

        dataFile = "emission/tests/data/real_examples/iphone_2016-02-22"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 2, 'day': 22})
        end_ld = ecwl.LocalDate({'year': 2016, 'month': 2, 'day': 22})
        cacheKey = "diary/trips-2016-02-22"
        ground_truth = json.load(open(dataFile + ".ground_truth"),
                                 object_hook=bju.object_hook)

        logging.info("Before loading, timeseries db size = %s" %
                     edb.get_timeseries_db().count())
        all_entries = json.load(open(dataFile), object_hook=bju.object_hook)
        # 18:01 because the transition was at 2016-02-22T18:00:09.623404-08:00, so right after
        # 18:00
        ts_1800 = arrow.get("2016-02-22T18:00:30-08:00").timestamp
        logging.debug("ts_1800 = %s, converted back = %s" %
                      (ts_1800, arrow.get(ts_1800).to("America/Los_Angeles")))
        before_1800_entries = [
            e for e in all_entries
            if ad.AttrDict(e).metadata.write_ts <= ts_1800
        ]
        after_1800_entries = [
            e for e in all_entries
            if ad.AttrDict(e).metadata.write_ts > ts_1800
        ]

        # Sync at 18:00 to capture all the points on the trip *to* the optometrist
        # Skip the last few points to ensure that the trip end is skipped
        import uuid
        self.testUUID = uuid.uuid4()
        self.entries = before_1800_entries[0:-2]
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

        # Then sync after 18:00
        self.entries = after_1800_entries
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

        # Although we process the day's data in two batches, we should get the same result
        self.compare_approx_result(ad.AttrDict({
            'result': api_result
        }).result,
                                   ad.AttrDict(ground_truth).data,
                                   time_fuzz=60,
                                   distance_fuzz=100)
    def testIndexLengthChange(self):
        # Test for 94f67b4848611fa01c4327a0fa0cab97c2247744
        dataFile = "emission/tests/data/real_examples/shankari_2015-08-23"
        start_ld = ecwl.LocalDate({'year': 2015, 'month': 8, 'day': 23})
        cacheKey = "diary/trips-2015-08-23"
        ground_truth = json.load(open("emission/tests/data/real_examples/shankari_2015-08-23.ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data)
    def testIosJumpsAndUntrackedSquishing(self):
        # Test for a2c0ee4e3ceafa822425ceef299dcdb01c9b32c9
        dataFile = "emission/tests/data/real_examples/sunil_2016-07-20"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 20})
        cacheKey = "diary/trips-2016-07-20"
        ground_truth = json.load(open("emission/tests/data/real_examples/sunil_2016-07-20.ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data)
    def testIndexLengthChange(self):
        # Test for 94f67b4848611fa01c4327a0fa0cab97c2247744
        dataFile = "emission/tests/data/real_examples/shankari_2015-08-23"
        start_ld = ecwl.LocalDate({'year': 2015, 'month': 8, 'day': 23})
        cacheKey = "diary/trips-2015-08-23"
        ground_truth = json.load(open("emission/tests/data/real_examples/shankari_2015-08-23.ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data)
    def testFeb22ShortTripsDistance(self):
        dataFile = "emission/tests/data/real_examples/iphone_3_2016-02-22"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 2, 'day': 22})
        end_ld = ecwl.LocalDate({'year': 2016, 'month': 2, 'day': 22})
        cacheKey = "diary/trips-2016-02-22"
        ground_truth = json.load(open(dataFile+".ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                                   ad.AttrDict(ground_truth).data)
Пример #34
0
 def setUp(self):
     etc.setupRealExample(self,
                          "emission/tests/data/real_examples/shankari_2015-aug-21")
     self.testUUID1 = self.testUUID
     etc.setupRealExample(self,
                          "emission/tests/data/real_examples/shankari_2015-aug-27")
     etc.runIntakePipeline(self.testUUID1)
     etc.runIntakePipeline(self.testUUID)
     logging.info(
         "After loading, timeseries db size = %s" % edb.get_timeseries_db().count())
     self.aug_start_ts = 1438387200
     self.aug_end_ts = 1441065600
     self.day_start_dt = esdldq.get_local_date(self.aug_start_ts, "America/Los_Angeles")
     self.day_end_dt = esdldq.get_local_date(self.aug_end_ts, "America/Los_Angeles")
Пример #35
0
 def setUp(self):
     etc.setupRealExample(self,
                          "emission/tests/data/real_examples/shankari_2015-aug-21")
     self.testUUID1 = self.testUUID
     etc.setupRealExample(self,
                          "emission/tests/data/real_examples/shankari_2015-aug-27")
     etc.runIntakePipeline(self.testUUID1)
     etc.runIntakePipeline(self.testUUID)
     logging.info(
         "After loading, timeseries db size = %s" % edb.get_timeseries_db().count())
     self.aug_start_ts = 1438387200
     self.aug_end_ts = 1441065600
     self.day_start_dt = esdldq.get_local_date(self.aug_start_ts, "America/Los_Angeles")
     self.day_end_dt = esdldq.get_local_date(self.aug_end_ts, "America/Los_Angeles")
    def testAllQuery(self): 
        dataFile = "emission/tests/data/real_examples/shankari_2016-06-20"
        ld = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20})

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)
        self.testUUIDList = [self.testUUID]
        uuid_list = pointcount.query({
            "time_type": None,
            "modes": None,
            "sel_region": None
        })
        logging.debug("uuid_list = %s" % uuid_list)
        self.assertGreater(len(uuid_list), 1)
        self.assertIn(self.testUUID, uuid_list)
Пример #37
0
    def testAllQuery(self):
        dataFile = "emission/tests/data/real_examples/shankari_2016-06-20"
        ld = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20})

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)
        self.testUUIDList = [self.testUUID]
        uuid_list = pointcount.query({
            "time_type": None,
            "modes": None,
            "sel_region": None
        })
        logging.debug("uuid_list = %s" % uuid_list)
        self.assertGreater(len(uuid_list), 1)
        self.assertIn(self.testUUID, uuid_list)
    def testAug27TooMuchExtrapolation(self):
        dataFile = "emission/tests/data/real_examples/shankari_2015-aug-27"
        start_ld = ecwl.LocalDate({'year': 2015, 'month': 8, 'day': 27})
        end_ld = ecwl.LocalDate({'year': 2015, 'month': 8, 'day': 27})
        cacheKey = "diary/trips-2015-08-27"
        with open(dataFile+".ground_truth") as gfp:
            ground_truth = json.load(gfp, object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data)
    def testIsMatchedUser(self):
        # Load data for the Bay Area
        dataFileba = "emission/tests/data/real_examples/shankari_2016-06-20"
        ldba = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20})

        etc.setupRealExample(self, dataFileba)
        testUUIDba = self.testUUID
        etc.runIntakePipeline(testUUIDba)
        logging.debug("uuid for the bay area = %s " % testUUIDba)

        # Load data for Hawaii
        dataFilehi = "emission/tests/data/real_examples/shankari_2016-07-27"
        ldhi = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 27})

        etc.setupRealExample(self, dataFilehi)
        testUUIDhi = self.testUUID
        etc.runIntakePipeline(testUUIDhi)

        logging.debug("uuid for hawaii = %s " % testUUIDhi)

        self.testUUIDList = [testUUIDba, testUUIDhi]

        air_query_spec = {
            "time_type": "local_date",
            "from_local_date": { "year": 2016, "month": 2},
            "to_local_date": { "year": 2016, "month": 9},
            "freq": 'DAILY',
            "checks": [
                {
                    "modes": ['WALKING', 'ON_FOOT'],
                    "metric": "count",
                    "threshold": {"$gt": 5}
                },
                {
                    "modes": ['AIR_OR_HSR'],
                    "metric": "count",
                    "threshold": {"$gt": 1}
                }
            ]
        }

        # Since this requires at least one air trip, this will only return the
        # hawaii trip

        self.assertTrue(tripmetrics.is_matched_user(testUUIDhi, air_query_spec))
        self.assertFalse(tripmetrics.is_matched_user(testUUIDba, air_query_spec))
Пример #40
0
    def testTimeQueryPos(self):
        dataFile = "emission/tests/data/real_examples/shankari_2016-06-20"
        ld = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20})

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)
        self.testUUIDList = [self.testUUID]

        uuid_list = pointcount.query({
            "time_type": "local_date",
            "from_local_date": dict(ld),
            "to_local_date": dict(ld),
            "modes": None,
            "sel_region": None
        })
        logging.debug("uuid_list = %s" % uuid_list)
        self.assertEqual(uuid_list, [self.testUUID])
    def testTimeQueryPos(self): 
        dataFile = "emission/tests/data/real_examples/shankari_2016-06-20"
        ld = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20})

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)
        self.testUUIDList = [self.testUUID]

        uuid_list = pointcount.query({
            "time_type": "local_date",
            "from_local_date": dict(ld),
            "to_local_date": dict(ld),
            "modes": None,
            "sel_region": None
        })
        logging.debug("uuid_list = %s" % uuid_list)
        self.assertEqual(uuid_list, [self.testUUID])
    def standardMatchDataGroundTruth(self, dataFile, ld, cacheKey):
        with open(dataFile+".ground_truth") as gfp:
            ground_truth = json.load(gfp, object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)
        # runIntakePipeline does not run the common trips, habitica or store views to cache
        # So let's manually store to the cache
        # tc_query = estt.TimeComponentQuery("data.star_local_dt", ld, ld)
        # enuah.UserCacheHandler.getUserCacheHandler(self.testUUID).storeTimelineToCache(tc_query)

        # cached_result = edb.get_usercache_db().find_one({'user_id': self.testUUID,
        #                                                  "metadata.key": cacheKey})
        api_result = gfc.get_geojson_for_dt(self.testUUID, ld, ld)

        # self.compare_result(cached_result, ground_truth)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data)
    def testAug27TooMuchExtrapolation(self):
        dataFile = "emission/tests/data/real_examples/shankari_2015-aug-27"
        start_ld = ecwl.LocalDate({'year': 2015, 'month': 8, 'day': 27})
        end_ld = ecwl.LocalDate({'year': 2015, 'month': 8, 'day': 27})
        cacheKey = "diary/trips-2015-08-27"
        with open(dataFile + ".ground_truth") as gfp:
            ground_truth = json.load(gfp, object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth).data)
Пример #44
0
 def setUp(self):
     self.analysis_conf_path = \
         etc.set_analysis_config("analysis.result.section.key", "analysis/cleaned_section")
     etc.setupRealExample(
         self, "emission/tests/data/real_examples/shankari_2015-aug-21")
     self.testUUID1 = self.testUUID
     etc.setupRealExample(
         self, "emission/tests/data/real_examples/shankari_2015-aug-27")
     etc.runIntakePipeline(self.testUUID1)
     etc.runIntakePipeline(self.testUUID)
     logging.info("After loading, timeseries db size = %s" %
                  edb.get_timeseries_db().estimated_document_count())
     self.aug_start_ts = 1438387200
     self.aug_end_ts = 1441065600
     self.day_start_dt = ecwl.LocalDate.get_local_date(
         self.aug_start_ts, "America/Los_Angeles")
     self.day_end_dt = ecwl.LocalDate.get_local_date(
         self.aug_end_ts, "America/Los_Angeles")
Пример #45
0
 def setUp(self):
       # Thanks to M&J for the number!
     np.random.seed(61297777)
     self.copied_model_path = etc.copy_dummy_seed_for_inference()
     dataFile = "emission/tests/data/real_examples/shankari_2016-08-10"
     start_ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 9})
     end_ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 10})
     cacheKey = "diary/trips-2016-08-10"
     etc.setupRealExample(self, dataFile)
     etc.runIntakePipeline(self.testUUID)
     # Default intake pipeline now includes mode inference
     # this is correct in general, but causes errors while testing the mode inference
     # because then that step is effectively run twice. This code
     # rolls back the results of running the mode inference as part of the
     # pipeline and allows us to correctly test the mode inference pipeline again.
     pipeline.del_objects_after(self.testUUID, 0, is_dry_run=False)
     self.pipeline = pipeline.ModeInferencePipeline()
     self.pipeline.loadModelStage()
Пример #46
0
 def setUp(self):
     # Thanks to M&J for the number!
     np.random.seed(61297777)
     self.copied_model_path = etc.copy_dummy_seed_for_inference()
     dataFile = "emission/tests/data/real_examples/shankari_2016-08-10"
     start_ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 9})
     end_ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 10})
     cacheKey = "diary/trips-2016-08-10"
     etc.setupRealExample(self, dataFile)
     etc.runIntakePipeline(self.testUUID)
     # Default intake pipeline now includes mode inference
     # this is correct in general, but causes errors while testing the mode inference
     # because then that step is effectively run twice. This code
     # rolls back the results of running the mode inference as part of the
     # pipeline and allows us to correctly test the mode inference pipeline again.
     modereset.del_objects_after(self.testUUID, 0, is_dry_run=False)
     self.pipeline = pipeline.ModeInferencePipeline()
     self.pipeline.loadModelStage()
Пример #47
0
    def testUserInputRealData(self):
        np.random.seed(61297777)
        dataFile = "emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12"
        etc.setupRealExample(self, dataFile)
        self.testUserId = self.testUUID
        # At this point, we have only raw data, no trips
        etc.runIntakePipeline(self.testUUID)
        # At this point, we have trips

        # Let's retrieve them
        ts = esta.TimeSeries.get_time_series(self.testUUID)
        ct_df = ts.get_data_df("analysis/cleaned_trip", time_query=None)
        self.assertEqual(len(ct_df), 4)

        # Now, let's load the mode_confirm and purpose_confirm objects
        mode_confirm_list = json.load(open("emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12.mode_confirm"),
            object_hook=bju.object_hook)
        self.assertEqual(len(mode_confirm_list), 5)

        purpose_confirm_list = json.load(open("emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12.purpose_confirm"),
            object_hook=bju.object_hook)
        self.assertEqual(len(purpose_confirm_list), 7)

        for mc in mode_confirm_list:
            mc["user_id"] = self.testUUID
            ts.insert(mc)

        for pc in purpose_confirm_list:
            pc["user_id"] = self.testUUID
            ts.insert(pc)

        mc_label_list = []
        pc_label_list = []
        for trip_id in ct_df._id:
            mc = esdt.get_user_input_for_trip(esda.CLEANED_TRIP_KEY,
                        self.testUserId, ct_df._id[0], "manual/mode_confirm")
            mc_label_list.append(mc.data.label)

            pc = esdt.get_user_input_for_trip(esda.CLEANED_TRIP_KEY,
                        self.testUserId, ct_df._id[0], "manual/purpose_confirm")
            pc_label_list.append(pc.data.label)

        self.assertEqual(mc_label_list, 4 * ['bike'])
        self.assertEqual(pc_label_list, 4 * ['pick_drop'])
    def testTimeQueryNeg(self): 
        # Load data for the 20th
        dataFile20 = "emission/tests/data/real_examples/shankari_2016-06-20"
        ld20 = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20})

        etc.setupRealExample(self, dataFile20)
        testUUID20 = self.testUUID
        etc.runIntakePipeline(testUUID20)

        # Load data for the 21st
        dataFile21 = "emission/tests/data/real_examples/shankari_2016-06-21"
        ld21 = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 21})

        etc.setupRealExample(self, dataFile21)
        testUUID21 = self.testUUID
        etc.runIntakePipeline(testUUID21)

        self.testUUIDList = [testUUID20, testUUID21]

        uuid_list20 = pointcount.query({
            "time_type": "local_date",
            "from_local_date": dict(ld20),
            "to_local_date": dict(ld20),
            "modes": None,
            "sel_region": None
        })
        logging.debug("uuid_list for the 20th = %s" % uuid_list20)
       
        # We should only get uuid from the 20th back 
        self.assertEqual(uuid_list20, [testUUID20])

        uuid_list21 = pointcount.query({
            "time_type": "local_date",
            "from_local_date": dict(ld21),
            "to_local_date": dict(ld21),
            "modes": None,
            "sel_region": None
        })
        logging.debug("uuid_list for the 21st = %s" % uuid_list21)
       
        # We should only get uuid from the 21st back 
        self.assertEqual(uuid_list21, [testUUID21])
Пример #49
0
    def testTimeQueryNeg(self):
        # Load data for the 20th
        dataFile20 = "emission/tests/data/real_examples/shankari_2016-06-20"
        ld20 = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20})

        etc.setupRealExample(self, dataFile20)
        testUUID20 = self.testUUID
        etc.runIntakePipeline(testUUID20)

        # Load data for the 21st
        dataFile21 = "emission/tests/data/real_examples/shankari_2016-06-21"
        ld21 = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 21})

        etc.setupRealExample(self, dataFile21)
        testUUID21 = self.testUUID
        etc.runIntakePipeline(testUUID21)

        self.testUUIDList = [testUUID20, testUUID21]

        uuid_list20 = pointcount.query({
            "time_type": "local_date",
            "from_local_date": dict(ld20),
            "to_local_date": dict(ld20),
            "modes": None,
            "sel_region": None
        })
        logging.debug("uuid_list for the 20th = %s" % uuid_list20)

        # We should only get uuid from the 20th back
        self.assertEqual(uuid_list20, [testUUID20])

        uuid_list21 = pointcount.query({
            "time_type": "local_date",
            "from_local_date": dict(ld21),
            "to_local_date": dict(ld21),
            "modes": None,
            "sel_region": None
        })
        logging.debug("uuid_list for the 21st = %s" % uuid_list21)

        # We should only get uuid from the 21st back
        self.assertEqual(uuid_list21, [testUUID21])
    def testAug10MultiSyncEndDetected(self):
        # Re-run, but with multiple calls to sync data
        # This tests the effect of online versus offline analysis and segmentation with potentially partial data

        dataFile = "emission/tests/data/real_examples/shankari_2016-08-10"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 9})
        end_ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 10})
        cacheKey = "diary/trips-2016-08-10"
        ground_truth = json.load(open("emission/tests/data/real_examples/shankari_2016-08-910.ground_truth"),
                                 object_hook=bju.object_hook)

        logging.info("Before loading, timeseries db size = %s" % edb.get_timeseries_db().count())
        all_entries = None
        with open(dataFile) as secondfp:
            all_entries = json.load(secondfp, object_hook = bju.object_hook)
        ts_1030 = arrow.get("2016-08-10T10:30:00-07:00").timestamp
        logging.debug("ts_1030 = %s, converted back = %s" % (ts_1030, arrow.get(ts_1030).to("America/Los_Angeles")))
        before_1030_entries = [e for e in all_entries if ad.AttrDict(e).metadata.write_ts <= ts_1030]
        after_1030_entries = [e for e in all_entries if ad.AttrDict(e).metadata.write_ts > ts_1030]

        # First load all data from the 9th. Otherwise, the missed trip is the first trip,
        # and we don't set the last_ts_processed
        # See the code around "logging.debug("len(segmentation_points) == 0, early return")"
        etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2016-08-09")

        # Sync at 10:30 to capture all the points on the trip *to* the optometrist
        self.entries = before_1030_entries
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

        # Then sync after 10:30
        self.entries = after_1030_entries
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

        # Although we process the day's data in two batches, we should get the same result
        self.compare_approx_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data, time_fuzz=60, distance_fuzz=100)
Пример #51
0
    def checkConfirmedTripsAndSections(self, dataFile, ld, preload=False):
        with open(dataFile + ".ground_truth") as gfp:
            ground_truth = json.load(gfp, object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        if (preload):
            self.entries = json.load(open(dataFile + ".user_inputs"),
                                     object_hook=bju.object_hook)
            etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        if (not preload):
            self.entries = json.load(open(dataFile + ".user_inputs"),
                                     object_hook=bju.object_hook)
            etc.setupRealExampleWithEntries(self)
            etc.runIntakePipeline(self.testUUID)
        ts = esta.TimeSeries.get_time_series(self.testUUID)
        confirmed_trips = list(
            ts.find_entries(["analysis/confirmed_trip"], None))
        with open(dataFile + ".expected_confirmed_trips") as dect:
            expected_confirmed_trips = json.load(dect,
                                                 object_hook=bju.object_hook)
            self.compare_trip_result(confirmed_trips, expected_confirmed_trips)
    def testResetToPast(self):
        """
        - Load data for both days
        - Run pipelines
        - Verify that all is well
        - Reset to a date before both
        - Verify that analysis data for the both days is removed
        - Re-run pipelines
        - Verify that all is well
        """
        # Load all data
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook)

        # Run both pipelines
        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        # Verify that all is well
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)

        # Reset to a date well before the two days
        reset_ts = arrow.get("2015-07-24").timestamp
        epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False)

        # Data should be completely deleted
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.assertEqual(api_result, [])

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.assertEqual(api_result, [])

        # Re-running the pipeline again
        etc.runIntakePipeline(self.testUUID)
        
        # Should reconstruct everything
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)
    def testResetToStart(self):
        """
        - Load data for both days
        - Run pipelines
        - Verify that all is well
        - Reset to start
        - Verify that there is no analysis data
        - Re-run pipelines
        - Verify that all is well
        """

        # Load all data
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook)

        # Run both pipelines
        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        # Check results: so far, so good
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)

        # Reset pipeline to start
        epr.reset_user_to_start(self.testUUID, is_dry_run=False)

        # Now there are no results
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.assertEqual(api_result, [])

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.assertEqual(api_result, [])

        # Re-run the pipeline again
        etc.runIntakePipeline(self.testUUID)

        # Should be back to ground truth
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)
Пример #54
0
    def setUp(self):
        etc.setupRealExample(self,
                             "emission/tests/data/real_examples/shankari_2015-aug-27")
        # eaicf.filter_accuracy(self.testUUID)
        etc.runIntakePipeline(self.testUUID)
        # estfm.move_all_filters_to_data()
        logging.info(
            "After loading, timeseries db size = %s" % edb.get_timeseries_db().count())
        self.day_start_ts = 1440658800
        self.day_end_ts = 1440745200
        self.day_start_dt = esdldq.get_local_date(self.day_start_ts, "America/Los_Angeles")
        self.day_end_dt = esdldq.get_local_date(self.day_end_ts, "America/Los_Angeles")

        # If we don't delete the time components, we end up with the upper and
        # lower bounds = 0, which basically matches nothing.
        del self.day_start_dt['hour']
        del self.day_end_dt['hour']

        del self.day_start_dt['minute']
        del self.day_end_dt['minute']

        del self.day_start_dt['second']
        del self.day_end_dt['second']
    def testOct07MultiSyncSpuriousEndDetected(self):
        # Re-run, but with multiple calls to sync data
        # This tests the effect of online versus offline analysis and segmentation with potentially partial data

        dataFile = "emission/tests/data/real_examples/issue_436_assertion_error"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 10, 'day': 07})
        end_ld = ecwl.LocalDate({'year': 2016, 'month': 10, 'day': 07})
        cacheKey = "diary/trips-2016-10-07"
        ground_truth = json.load(open(dataFile+".ground_truth"), object_hook=bju.object_hook)

        logging.info("Before loading, timeseries db size = %s" % edb.get_timeseries_db().count())
        all_entries = json.load(open(dataFile), object_hook = bju.object_hook)
        # 18:01 because the transition was at 2016-02-22T18:00:09.623404-08:00, so right after
        # 18:00
        ts_1800 = arrow.get("2016-10-07T18:33:11-07:00").timestamp
        logging.debug("ts_1800 = %s, converted back = %s" % (ts_1800, arrow.get(ts_1800).to("America/Los_Angeles")))
        before_1800_entries = [e for e in all_entries if ad.AttrDict(e).metadata.write_ts <= ts_1800]
        after_1800_entries = [e for e in all_entries if ad.AttrDict(e).metadata.write_ts > ts_1800]

        # Sync at 18:00 to capture all the points on the trip *to* the optometrist
        # Skip the last few points to ensure that the trip end is skipped
        import uuid
        self.testUUID = uuid.uuid4()
        self.entries = before_1800_entries
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

        # Then sync after 18:00
        self.entries = after_1800_entries
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

        # Although we process the day's data in two batches, we should get the same result
        self.compare_approx_result(ad.AttrDict({'result': api_result}).result,
                                   ad.AttrDict(ground_truth).data, time_fuzz=60, distance_fuzz=100)
    def testJun21(self):
        # This is a more complex day. Tests:
        # PR #357 (spurious trip at 14:00 should be segmented and skipped)
        # PR #358 (trip back from bella's house at 16:00)

        dataFile = "emission/tests/data/real_examples/shankari_2016-06-21"
        ld = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 21})
        cacheKey = "diary/trips-2016-06-21"
        ground_truth = json.load(open(dataFile+".ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        etc.runIntakePipeline(self.testUUID)
        # runIntakePipeline does not run the common trips, habitica or store views to cache
        # So let's manually store to the cache
        # tc_query = estt.TimeComponentQuery("data.star_local_dt", ld, ld)
        # enuah.UserCacheHandler.getUserCacheHandler(self.testUUID).storeTimelineToCache(tc_query)

        # cached_result = edb.get_usercache_db().find_one({'user_id': self.testUUID,
        #                                                  "metadata.key": cacheKey})
        api_result = gfc.get_geojson_for_dt(self.testUUID, ld, ld)

        # self.compare_result(cached_result, ground_truth)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data)
    def testResetToFuture(self):
        """
        - Load data for both days
        - Run pipelines
        - Reset to a date after the two
        - Verify that all is well
        - Re-run pipelines and ensure that there are no errors
        """
        # Load all data
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook)

        # Run both pipelines
        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        # Reset to a date well after the two days
        reset_ts = arrow.get("2017-07-24").timestamp
        epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False)

        # Data should be untouched because of early return
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)

        # Re-running the pipeline again should not affect anything
        etc.runIntakePipeline(self.testUUID)
    def testGeoQuery(self):
        # Load data for the Bay Area
        dataFileba = "emission/tests/data/real_examples/shankari_2016-06-20"
        ldba = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20})

        etc.setupRealExample(self, dataFileba)
        testUUIDba = self.testUUID
        etc.runIntakePipeline(testUUIDba)
        logging.debug("uuid for the bay area = %s " % testUUIDba)

        # Load data for Hawaii
        dataFilehi = "emission/tests/data/real_examples/shankari_2016-07-27"
        ldhi = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 27})

        etc.setupRealExample(self, dataFilehi)
        testUUIDhi = self.testUUID
        etc.runIntakePipeline(testUUIDhi)

        logging.debug("uuid for hawaii = %s " % testUUIDhi)

        self.testUUIDList = [testUUIDba, testUUIDhi]

        uuid_listba = pointcount.query({
            "time_type": "local_date",
            "from_local_date": {'year': 2016, 'month': 5, 'day': 20},
            "to_local_date": {'year': 2016, 'month': 10, 'day': 20},
            "modes": None,
            "sel_region": {
                "geometry": {
                    "type": "Polygon",
                    "coordinates": [
                      [ [-122.0731149, 37.4003834], 
                        [-122.07302, 37.3804759],
                        [-122.1232527, 37.4105125],
                        [-122.1101028, 37.4199638],
                        [-122.0731149, 37.4003834] ]
                      ]
                }
            }
        })
        logging.debug("uuid_list for the bay area = %s" % uuid_listba)
       
        # We should only get uuid from the bay area back 
        self.assertEqual(uuid_listba, [testUUIDba])

        uuid_listhi = pointcount.query({
            "time_type": None,
            "modes": None,
            "sel_region": {
                "geometry": {
                    "type": "Polygon",
                    "coordinates": [
                      [ [-157.9614841, 21.3631988],
                        [-157.9267982, 21.3780131],
                        [-157.7985052, 21.279961], 
                        [-157.8047025, 21.2561483],
                        [-157.9614841, 21.3631988] ]
                      ]
                }
            }
        })
        logging.debug("uuid_list for hawaii = %s" % uuid_listhi)
       
        # We should only get uuid from the 21st back 
        self.assertEqual(uuid_listhi, [testUUIDhi])