def testJul22SplitAroundReboot(self):
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1 + ".ground_truth"),
                                   object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2 + ".ground_truth"),
                                   object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook=bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1,
                                            start_ld_1)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2,
                                            start_ld_2)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_2).data)
    def testZeroDurationPlaceInterpolationSingleSync(self):
        # Test for 545114feb5ac15caac4110d39935612525954b71
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-01-12"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-01-13"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 1, 'day': 12})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 1, 'day': 13})
        cacheKey_1 = "diary/trips-2016-01-12"
        cacheKey_2 = "diary/trips-2016-01-13"
        ground_truth_1 = json.load(open(dataFile_1 + ".ground_truth"),
                                   object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2 + ".ground_truth"),
                                   object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile_1)
        self.entries = json.load(open(dataFile_2), object_hook=bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1,
                                            start_ld_1)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2,
                                            start_ld_2)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_2).data)
    def testZeroDurationPlaceInterpolationMultiSync(self):
        # Test for 545114feb5ac15caac4110d39935612525954b71
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-01-12"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-01-13"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 1, 'day': 12})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 1, 'day': 13})
        cacheKey_1 = "diary/trips-2016-01-12"
        cacheKey_2 = "diary/trips-2016-01-13"
        ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)
    def testJul22SplitAroundReboot(self):
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        # Although we process the day's data in two batches, we should get the same result
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)
    def testResetToStart(self):
        """
        - Load data for both days
        - Run pipelines
        - Verify that all is well
        - Reset to start
        - Verify that there is no analysis data
        - Re-run pipelines
        - Verify that all is well
        """

        # Load all data
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook)

        # Run both pipelines
        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        # Check results: so far, so good
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)

        # Reset pipeline to start
        epr.reset_user_to_start(self.testUUID, is_dry_run=False)

        # Now there are no results
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.assertEqual(api_result, [])

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.assertEqual(api_result, [])

        # Re-run the pipeline again
        etc.runIntakePipeline(self.testUUID)

        # Should be back to ground truth
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)
    def testResetToPast(self):
        """
        - Load data for both days
        - Run pipelines
        - Verify that all is well
        - Reset to a date before both
        - Verify that analysis data for the both days is removed
        - Re-run pipelines
        - Verify that all is well
        """
        # Load all data
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook)

        # Run both pipelines
        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        # Verify that all is well
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)

        # Reset to a date well before the two days
        reset_ts = arrow.get("2015-07-24").timestamp
        epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False)

        # Data should be completely deleted
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.assertEqual(api_result, [])

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.assertEqual(api_result, [])

        # Re-running the pipeline again
        etc.runIntakePipeline(self.testUUID)
        
        # Should reconstruct everything
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)
    def testAug10MultiSyncEndNotDetected(self):
        # Re-run, but with multiple calls to sync data
        # This tests the effect of online versus offline analysis and segmentation with potentially partial data

        dataFile = "emission/tests/data/real_examples/shankari_2016-08-10"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 9})
        end_ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 10})
        cacheKey = "diary/trips-2016-08-10"
        with open(
                "emission/tests/data/real_examples/shankari_2016-08-910.ground_truth"
        ) as gtf:
            ground_truth = json.load(gtf, object_hook=bju.object_hook)

        logging.info("Before loading, timeseries db size = %s" %
                     edb.get_timeseries_db().estimated_document_count())
        with open(dataFile) as df:
            all_entries = json.load(df, object_hook=bju.object_hook)
        ts_1030 = arrow.get("2016-08-10T10:30:00-07:00").timestamp
        logging.debug("ts_1030 = %s, converted back = %s" %
                      (ts_1030, arrow.get(ts_1030).to("America/Los_Angeles")))
        before_1030_entries = [
            e for e in all_entries
            if ad.AttrDict(e).metadata.write_ts <= ts_1030
        ]
        after_1030_entries = [
            e for e in all_entries
            if ad.AttrDict(e).metadata.write_ts > ts_1030
        ]

        # First load all data from the 9th. Otherwise, the missed trip is the first trip,
        # and we don't set the last_ts_processed
        # See the code around "logging.debug("len(segmentation_points) == 0, early return")"
        etc.setupRealExample(
            self, "emission/tests/data/real_examples/shankari_2016-08-09")

        # Sync at 10:30 to capture all the points on the trip *to* the optometrist
        # Skip the last few points to ensure that the trip end is skipped
        self.entries = before_1030_entries[0:-2]
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

        # Then sync after 10:30
        self.entries = after_1030_entries
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)
        self.persistGroundTruthIfNeeded(api_result, dataFile, start_ld,
                                        cacheKey)

        # Although we process the day's data in two batches, we should get the same result
        self.compare_approx_result(ad.AttrDict({
            'result': api_result
        }).result,
                                   ad.AttrDict(ground_truth).data,
                                   time_fuzz=60,
                                   distance_fuzz=100)
    def setUp(self):
        etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
        self.androidUUID = self.testUUID

        self.testUUID = uuid.UUID("c76a0487-7e5a-3b17-a449-47be666b36f6")
        self.entries = json.load(open("emission/tests/data/real_examples/iphone_2015-11-06"), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        self.iosUUID = self.testUUID
        eaicf.filter_accuracy(self.iosUUID)
        logging.debug("androidUUID = %s, iosUUID = %s" % (self.androidUUID, self.iosUUID))
    def setUp(self):
        etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
        self.androidUUID = self.testUUID
        eaicf.filter_accuracy(self.androidUUID)

        self.testUUID = uuid.UUID("c76a0487-7e5a-3b17-a449-47be666b36f6")
        self.entries = json.load(open("emission/tests/data/real_examples/iphone_2015-11-06"), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        self.iosUUID = self.testUUID
        eaicf.filter_accuracy(self.iosUUID)
    def testOct07MultiSyncSpuriousEndDetected(self):
        # Re-run, but with multiple calls to sync data
        # This tests the effect of online versus offline analysis and segmentation with potentially partial data

        dataFile = "emission/tests/data/real_examples/issue_436_assertion_error"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 10, 'day': 0o7})
        end_ld = ecwl.LocalDate({'year': 2016, 'month': 10, 'day': 0o7})
        cacheKey = "diary/trips-2016-10-07"
        with open(dataFile + ".ground_truth") as gtf:
            ground_truth = json.load(gtf, object_hook=bju.object_hook)

        logging.info("Before loading, timeseries db size = %s" %
                     edb.get_timeseries_db().estimated_document_count())
        with open(dataFile) as df:
            all_entries = json.load(df, object_hook=bju.object_hook)
        # 18:01 because the transition was at 2016-02-22T18:00:09.623404-08:00, so right after
        # 18:00
        ts_1800 = arrow.get("2016-10-07T18:33:11-07:00").timestamp
        logging.debug("ts_1800 = %s, converted back = %s" %
                      (ts_1800, arrow.get(ts_1800).to("America/Los_Angeles")))
        before_1800_entries = [
            e for e in all_entries
            if ad.AttrDict(e).metadata.write_ts <= ts_1800
        ]
        after_1800_entries = [
            e for e in all_entries
            if ad.AttrDict(e).metadata.write_ts > ts_1800
        ]

        # Sync at 18:00 to capture all the points on the trip *to* the optometrist
        # Skip the last few points to ensure that the trip end is skipped
        etc.createAndFillUUID(self)
        self.entries = before_1800_entries
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

        # Then sync after 18:00
        self.entries = after_1800_entries
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)
        self.persistGroundTruthIfNeeded(api_result, dataFile, start_ld,
                                        cacheKey)

        # Although we process the day's data in two batches, we should get the same result
        self.compare_approx_result(ad.AttrDict({
            'result': api_result
        }).result,
                                   ad.AttrDict(ground_truth).data,
                                   time_fuzz=60,
                                   distance_fuzz=100)
示例#11
0
    def setUp(self):
        self.analysis_conf_path = \
            etc.set_analysis_config("intake.cleaning.filter_accuracy.enable", True)

        etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
        self.androidUUID = self.testUUID
        eaicf.filter_accuracy(self.androidUUID)

        self.testUUID = uuid.UUID("c76a0487-7e5a-3b17-a449-47be666b36f6")
        self.entries = json.load(open("emission/tests/data/real_examples/iphone_2015-11-06"), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        self.iosUUID = self.testUUID
        eaicf.filter_accuracy(self.iosUUID)
    def setUp(self):
        self.analysis_conf_path = \
            etc.set_analysis_config("intake.cleaning.filter_accuracy.enable", True)

        etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
        self.androidUUID = self.testUUID

        self.testUUID = uuid.UUID("c76a0487-7e5a-3b17-a449-47be666b36f6")
        self.entries = json.load(open("emission/tests/data/real_examples/iphone_2015-11-06"), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        self.iosUUID = self.testUUID
        eaicf.filter_accuracy(self.iosUUID)
        logging.debug("androidUUID = %s, iosUUID = %s" % (self.androidUUID, self.iosUUID))
示例#13
0
    def testResetToFuture(self):
        """
        - Load data for both days
        - Run pipelines
        - Reset to a date after the two
        - Verify that all is well
        - Re-run pipelines and ensure that there are no errors
        """
        # Load all data
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1 + ".ground_truth"),
                                   object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2 + ".ground_truth"),
                                   object_hook=bju.object_hook)

        # Run both pipelines
        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook=bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        # Reset to a date well after the two days
        reset_ts = arrow.get("2017-07-24").timestamp
        epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False)

        # Data should be untouched because of early return
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1,
                                            start_ld_1)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2,
                                            start_ld_2)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_2).data)

        # Re-running the pipeline again should not affect anything
        etc.runIntakePipeline(self.testUUID)
    def testFeb22MultiSyncEndNotDetected(self):
        # Re-run, but with multiple calls to sync data
        # This tests the effect of online versus offline analysis and segmentation with potentially partial data

        dataFile = "emission/tests/data/real_examples/iphone_2016-02-22"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 2, 'day': 22})
        end_ld = ecwl.LocalDate({'year': 2016, 'month': 2, 'day': 22})
        cacheKey = "diary/trips-2016-02-22"
        ground_truth = json.load(open(dataFile + ".ground_truth"),
                                 object_hook=bju.object_hook)

        logging.info("Before loading, timeseries db size = %s" %
                     edb.get_timeseries_db().count())
        all_entries = json.load(open(dataFile), object_hook=bju.object_hook)
        # 18:01 because the transition was at 2016-02-22T18:00:09.623404-08:00, so right after
        # 18:00
        ts_1800 = arrow.get("2016-02-22T18:00:30-08:00").timestamp
        logging.debug("ts_1800 = %s, converted back = %s" %
                      (ts_1800, arrow.get(ts_1800).to("America/Los_Angeles")))
        before_1800_entries = [
            e for e in all_entries
            if ad.AttrDict(e).metadata.write_ts <= ts_1800
        ]
        after_1800_entries = [
            e for e in all_entries
            if ad.AttrDict(e).metadata.write_ts > ts_1800
        ]

        # Sync at 18:00 to capture all the points on the trip *to* the optometrist
        # Skip the last few points to ensure that the trip end is skipped
        import uuid
        self.testUUID = uuid.uuid4()
        self.entries = before_1800_entries[0:-2]
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

        # Then sync after 18:00
        self.entries = after_1800_entries
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

        # Although we process the day's data in two batches, we should get the same result
        self.compare_approx_result(ad.AttrDict({
            'result': api_result
        }).result,
                                   ad.AttrDict(ground_truth).data,
                                   time_fuzz=60,
                                   distance_fuzz=100)
    def testResetToFuture(self):
        """
        - Load data for both days
        - Run pipelines
        - Reset to a date after the two
        - Verify that all is well
        - Re-run pipelines and ensure that there are no errors
        """
        # Load all data
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook)

        # Run both pipelines
        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        # Reset to a date well after the two days
        reset_ts = arrow.get("2017-07-24").timestamp
        epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False)

        # Data should be untouched because of early return
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)

        # Re-running the pipeline again should not affect anything
        etc.runIntakePipeline(self.testUUID)
    def testAug10MultiSyncEndDetected(self):
        # Re-run, but with multiple calls to sync data
        # This tests the effect of online versus offline analysis and segmentation with potentially partial data

        dataFile = "emission/tests/data/real_examples/shankari_2016-08-10"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 9})
        end_ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 10})
        cacheKey = "diary/trips-2016-08-10"
        ground_truth = json.load(open("emission/tests/data/real_examples/shankari_2016-08-910.ground_truth"),
                                 object_hook=bju.object_hook)

        logging.info("Before loading, timeseries db size = %s" % edb.get_timeseries_db().count())
        all_entries = None
        with open(dataFile) as secondfp:
            all_entries = json.load(secondfp, object_hook = bju.object_hook)
        ts_1030 = arrow.get("2016-08-10T10:30:00-07:00").timestamp
        logging.debug("ts_1030 = %s, converted back = %s" % (ts_1030, arrow.get(ts_1030).to("America/Los_Angeles")))
        before_1030_entries = [e for e in all_entries if ad.AttrDict(e).metadata.write_ts <= ts_1030]
        after_1030_entries = [e for e in all_entries if ad.AttrDict(e).metadata.write_ts > ts_1030]

        # First load all data from the 9th. Otherwise, the missed trip is the first trip,
        # and we don't set the last_ts_processed
        # See the code around "logging.debug("len(segmentation_points) == 0, early return")"
        etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2016-08-09")

        # Sync at 10:30 to capture all the points on the trip *to* the optometrist
        self.entries = before_1030_entries
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

        # Then sync after 10:30
        self.entries = after_1030_entries
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

        # Although we process the day's data in two batches, we should get the same result
        self.compare_approx_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth).data, time_fuzz=60, distance_fuzz=100)
示例#17
0
    def checkConfirmedTripsAndSections(self, dataFile, ld, preload=False):
        with open(dataFile + ".ground_truth") as gfp:
            ground_truth = json.load(gfp, object_hook=bju.object_hook)

        etc.setupRealExample(self, dataFile)
        if (preload):
            self.entries = json.load(open(dataFile + ".user_inputs"),
                                     object_hook=bju.object_hook)
            etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        if (not preload):
            self.entries = json.load(open(dataFile + ".user_inputs"),
                                     object_hook=bju.object_hook)
            etc.setupRealExampleWithEntries(self)
            etc.runIntakePipeline(self.testUUID)
        ts = esta.TimeSeries.get_time_series(self.testUUID)
        confirmed_trips = list(
            ts.find_entries(["analysis/confirmed_trip"], None))
        with open(dataFile + ".expected_confirmed_trips") as dect:
            expected_confirmed_trips = json.load(dect,
                                                 object_hook=bju.object_hook)
            self.compare_trip_result(confirmed_trips, expected_confirmed_trips)
    def testOct07MultiSyncSpuriousEndDetected(self):
        # Re-run, but with multiple calls to sync data
        # This tests the effect of online versus offline analysis and segmentation with potentially partial data

        dataFile = "emission/tests/data/real_examples/issue_436_assertion_error"
        start_ld = ecwl.LocalDate({'year': 2016, 'month': 10, 'day': 07})
        end_ld = ecwl.LocalDate({'year': 2016, 'month': 10, 'day': 07})
        cacheKey = "diary/trips-2016-10-07"
        ground_truth = json.load(open(dataFile+".ground_truth"), object_hook=bju.object_hook)

        logging.info("Before loading, timeseries db size = %s" % edb.get_timeseries_db().count())
        all_entries = json.load(open(dataFile), object_hook = bju.object_hook)
        # 18:01 because the transition was at 2016-02-22T18:00:09.623404-08:00, so right after
        # 18:00
        ts_1800 = arrow.get("2016-10-07T18:33:11-07:00").timestamp
        logging.debug("ts_1800 = %s, converted back = %s" % (ts_1800, arrow.get(ts_1800).to("America/Los_Angeles")))
        before_1800_entries = [e for e in all_entries if ad.AttrDict(e).metadata.write_ts <= ts_1800]
        after_1800_entries = [e for e in all_entries if ad.AttrDict(e).metadata.write_ts > ts_1800]

        # Sync at 18:00 to capture all the points on the trip *to* the optometrist
        # Skip the last few points to ensure that the trip end is skipped
        import uuid
        self.testUUID = uuid.uuid4()
        self.entries = before_1800_entries
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

        # Then sync after 18:00
        self.entries = after_1800_entries
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

        # Although we process the day's data in two batches, we should get the same result
        self.compare_approx_result(ad.AttrDict({'result': api_result}).result,
                                   ad.AttrDict(ground_truth).data, time_fuzz=60, distance_fuzz=100)
    def testResetToTsInMiddleOfPlace(self):
        """
        - Load data for both days
        - Run pipelines
        - Verify that all is well
        - Reset to a date between the two
        - Verify that analysis data for the first day is unchanged
        - Verify that analysis data for the second day does not exist
        - Re-run pipelines
        - Verify that all is well
        """

        # Load all data
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook)

        # Run both pipelines
        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        # Check results: so far, so good
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)

        # Reset pipeline to july 23.
        # Note that this is actually 22nd 16:00 PDT, so this is partway
        # through the 22nd
        reset_ts = arrow.get("2016-07-23").timestamp
        epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False)

        # First day is unchanged, except that the last place doesn't have
        # exit data.
        # TODO: Modify ground truth to capture this change
        # Until then, we know that this will fail
#        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
#        self.compare_result(ad.AttrDict({'result': api_result}).result,
#                            ad.AttrDict(ground_truth_1).data)

        # Second day does not exist
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        logging.debug(json.dumps(api_result, indent=4, default=bju.default))
        self.assertEqual(api_result, [])

        # Re-run the pipeline again
        etc.runIntakePipeline(self.testUUID)

        # Should be back to ground truth
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)
示例#20
0
    def testResetToTsInMiddleOfPlace(self):
        """
        - Load data for both days
        - Run pipelines
        - Verify that all is well
        - Reset to a date between the two
        - Verify that analysis data for the first day is unchanged
        - Verify that analysis data for the second day does not exist
        - Re-run pipelines
        - Verify that all is well
        """

        # Load all data
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1 + ".ground_truth"),
                                   object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2 + ".ground_truth"),
                                   object_hook=bju.object_hook)

        # Run both pipelines
        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook=bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        # Check results: so far, so good
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1,
                                            start_ld_1)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2,
                                            start_ld_2)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_2).data)

        # Reset pipeline to july 23.
        # Note that this is actually 22nd 16:00 PDT, so this is partway
        # through the 22nd
        reset_ts = arrow.get("2016-07-23").timestamp
        epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False)

        # First day is unchanged, except that the last place doesn't have
        # exit data.
        # TODO: Modify ground truth to capture this change
        # Until then, we know that this will fail
        #        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        #        self.compare_result(ad.AttrDict({'result': api_result}).result,
        #                            ad.AttrDict(ground_truth_1).data)

        # Second day does not exist
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2,
                                            start_ld_2)
        logging.debug(json.dumps(api_result, indent=4, default=bju.default))
        self.assertEqual(api_result, [])

        # Re-run the pipeline again
        etc.runIntakePipeline(self.testUUID)

        # Should be back to ground truth
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1,
                                            start_ld_1)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2,
                                            start_ld_2)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_2).data)
    def testResetToTsInMiddleOfTrip(self):
        """
        - Load data for both days
        - Run pipelines
        - Verify that all is well
        - Reset to a date between the two
        - Verify that analysis data for the first day is unchanged
        - Verify that analysis data for the second day does not exist
        - Re-run pipelines
        - Verify that all is well
        """

        # Load all data
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook)

        # Run both pipelines
        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        # Check results: so far, so good
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)

        # Reset pipeline to july 24.
        # Note that this is actually 23nd 16:00 PDT
        # This will reset in the middle of the untracked time, which is
        # technically a trip, and will allow us to test the trip resetting
        # code
        reset_ts = arrow.get("2016-07-24").timestamp
        epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False)

        # Second day does not exist
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        logging.debug(json.dumps(api_result, indent=4, default=bju.default))
        self.assertEqual(api_result, [])

        # Re-run the pipeline again
        etc.runIntakePipeline(self.testUUID)

        # Should be back to ground truth
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)
示例#22
0
    def testResetToTsInMiddleOfTrip(self):
        """
        - Load data for both days
        - Run pipelines
        - Verify that all is well
        - Reset to a date between the two
        - Verify that analysis data for the first day is unchanged
        - Verify that analysis data for the second day does not exist
        - Re-run pipelines
        - Verify that all is well
        """

        # Load all data
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1 + ".ground_truth"),
                                   object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2 + ".ground_truth"),
                                   object_hook=bju.object_hook)

        # Run both pipelines
        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook=bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        # Check results: so far, so good
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1,
                                            start_ld_1)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2,
                                            start_ld_2)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_2).data)

        # Reset pipeline to july 24.
        # Note that this is actually 23nd 16:00 PDT
        # This will reset in the middle of the untracked time, which is
        # technically a trip, and will allow us to test the trip resetting
        # code
        reset_ts = arrow.get("2016-07-24").timestamp
        epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False)

        # Second day does not exist
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2,
                                            start_ld_2)
        logging.debug(json.dumps(api_result, indent=4, default=bju.default))
        self.assertEqual(api_result, [])

        # Re-run the pipeline again
        etc.runIntakePipeline(self.testUUID)

        # Should be back to ground truth
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1,
                                            start_ld_1)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2,
                                            start_ld_2)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_2).data)