def testResetToPast(self): """ - Load data for both days - Run pipelines - Verify that all is well - Reset to a date before both - Verify that analysis data for the both days is removed - Re-run pipelines - Verify that all is well """ # Load all data dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22" dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25" start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22}) start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25}) cacheKey_1 = "diary/trips-2016-07-22" cacheKey_2 = "diary/trips-2016-07-25" ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook) ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook) # Run both pipelines etc.setupRealExample(self, dataFile_1) etc.runIntakePipeline(self.testUUID) self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook) etc.setupRealExampleWithEntries(self) etc.runIntakePipeline(self.testUUID) # Verify that all is well api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_2).data) # Reset to a date well before the two days reset_ts = arrow.get("2015-07-24").timestamp epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False) # Data should be completely deleted api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.assertEqual(api_result, []) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.assertEqual(api_result, []) # Re-running the pipeline again etc.runIntakePipeline(self.testUUID) # Should reconstruct everything api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_2).data)
def testResetToFuture(self): """ - Load data for both days - Run pipelines - Reset to a date after the two - Verify that all is well - Re-run pipelines and ensure that there are no errors """ # Load all data dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22" dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25" start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22}) start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25}) cacheKey_1 = "diary/trips-2016-07-22" cacheKey_2 = "diary/trips-2016-07-25" ground_truth_1 = json.load(open(dataFile_1 + ".ground_truth"), object_hook=bju.object_hook) ground_truth_2 = json.load(open(dataFile_2 + ".ground_truth"), object_hook=bju.object_hook) # Run both pipelines etc.setupRealExample(self, dataFile_1) etc.runIntakePipeline(self.testUUID) self.entries = json.load(open(dataFile_2), object_hook=bju.object_hook) etc.setupRealExampleWithEntries(self) etc.runIntakePipeline(self.testUUID) # Reset to a date well after the two days reset_ts = arrow.get("2017-07-24").timestamp epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False) # Data should be untouched because of early return api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_2).data) # Re-running the pipeline again should not affect anything etc.runIntakePipeline(self.testUUID)
def testResetToFuture(self): """ - Load data for both days - Run pipelines - Reset to a date after the two - Verify that all is well - Re-run pipelines and ensure that there are no errors """ # Load all data dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22" dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25" start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22}) start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25}) cacheKey_1 = "diary/trips-2016-07-22" cacheKey_2 = "diary/trips-2016-07-25" ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook) ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook) # Run both pipelines etc.setupRealExample(self, dataFile_1) etc.runIntakePipeline(self.testUUID) self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook) etc.setupRealExampleWithEntries(self) etc.runIntakePipeline(self.testUUID) # Reset to a date well after the two days reset_ts = arrow.get("2017-07-24").timestamp epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False) # Data should be untouched because of early return api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_2).data) # Re-running the pipeline again should not affect anything etc.runIntakePipeline(self.testUUID)
def testResetToTsInMiddleOfTrip(self): """ - Load data for both days - Run pipelines - Verify that all is well - Reset to a date between the two - Verify that analysis data for the first day is unchanged - Verify that analysis data for the second day does not exist - Re-run pipelines - Verify that all is well """ # Load all data dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22" dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25" start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22}) start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25}) cacheKey_1 = "diary/trips-2016-07-22" cacheKey_2 = "diary/trips-2016-07-25" ground_truth_1 = json.load(open(dataFile_1 + ".ground_truth"), object_hook=bju.object_hook) ground_truth_2 = json.load(open(dataFile_2 + ".ground_truth"), object_hook=bju.object_hook) # Run both pipelines etc.setupRealExample(self, dataFile_1) etc.runIntakePipeline(self.testUUID) self.entries = json.load(open(dataFile_2), object_hook=bju.object_hook) etc.setupRealExampleWithEntries(self) etc.runIntakePipeline(self.testUUID) # Check results: so far, so good api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_2).data) # Reset pipeline to july 24. # Note that this is actually 23nd 16:00 PDT # This will reset in the middle of the untracked time, which is # technically a trip, and will allow us to test the trip resetting # code reset_ts = arrow.get("2016-07-24").timestamp epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False) # Second day does not exist api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) logging.debug(json.dumps(api_result, indent=4, default=bju.default)) self.assertEqual(api_result, []) # Re-run the pipeline again etc.runIntakePipeline(self.testUUID) # Should be back to ground truth api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_2).data)
def testResetToTsInMiddleOfPlace(self): """ - Load data for both days - Run pipelines - Verify that all is well - Reset to a date between the two - Verify that analysis data for the first day is unchanged - Verify that analysis data for the second day does not exist - Re-run pipelines - Verify that all is well """ # Load all data dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22" dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25" start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22}) start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25}) cacheKey_1 = "diary/trips-2016-07-22" cacheKey_2 = "diary/trips-2016-07-25" ground_truth_1 = json.load(open(dataFile_1 + ".ground_truth"), object_hook=bju.object_hook) ground_truth_2 = json.load(open(dataFile_2 + ".ground_truth"), object_hook=bju.object_hook) # Run both pipelines etc.setupRealExample(self, dataFile_1) etc.runIntakePipeline(self.testUUID) self.entries = json.load(open(dataFile_2), object_hook=bju.object_hook) etc.setupRealExampleWithEntries(self) etc.runIntakePipeline(self.testUUID) # Check results: so far, so good api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_2).data) # Reset pipeline to july 23. # Note that this is actually 22nd 16:00 PDT, so this is partway # through the 22nd reset_ts = arrow.get("2016-07-23").timestamp epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False) # First day is unchanged, except that the last place doesn't have # exit data. # TODO: Modify ground truth to capture this change # Until then, we know that this will fail # api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) # self.compare_result(ad.AttrDict({'result': api_result}).result, # ad.AttrDict(ground_truth_1).data) # Second day does not exist api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) logging.debug(json.dumps(api_result, indent=4, default=bju.default)) self.assertEqual(api_result, []) # Re-run the pipeline again etc.runIntakePipeline(self.testUUID) # Should be back to ground truth api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_2).data)
action="store_true", default=False, help="do everything except actually perform the operations") args = parser.parse_args() print args # Handle the first row in the table if args.date is None: if args.all: epr.reset_all_users_to_start(args.dry_run) else: user_list = _get_user_list(args) logging.info("received list with %s users" % user_list) logging.info("first few entries are %s" % user_list[0:5]) for user_id in user_list: logging.info("resetting user %s to start" % user_id) epr.reset_user_to_start(user_id, args.dry_run) else: # Handle the second row in the table day_dt = arrow.get(args.date, "YYYY-MM-DD") logging.debug("day_dt is %s" % day_dt) day_ts = day_dt.timestamp logging.debug("day_ts is %s" % day_ts) user_list = _get_user_list(args) logging.info("received list with %s users" % user_list) logging.info("first few entries are %s" % user_list[0:5]) for user_id in user_list: logging.info("resetting user %s to ts %s" % (user_id, day_ts)) epr.reset_user_to_ts(user_id, day_ts, args.dry_run)
parser.add_argument("-n", "--dry_run", action="store_true", default=False, help="do everything except actually perform the operations") args = parser.parse_args() print(args) # Handle the first row in the table if args.date is None: if args.all: epr.reset_all_users_to_start(args.dry_run) else: user_list = _get_user_list(args) logging.info("received list with %s users" % user_list) logging.info("first few entries are %s" % user_list[0:5]) for user_id in user_list: logging.info("resetting user %s to start" % user_id) epr.reset_user_to_start(user_id, args.dry_run) else: # Handle the second row in the table day_dt = arrow.get(args.date, "YYYY-MM-DD") logging.debug("day_dt is %s" % day_dt) day_ts = day_dt.timestamp logging.debug("day_ts is %s" % day_ts) user_list = _get_user_list(args) logging.info("received list with %s users" % user_list) logging.info("first few entries are %s" % user_list[0:5]) for user_id in user_list: logging.info("resetting user %s to ts %s" % (user_id, day_ts)) epr.reset_user_to_ts(user_id, day_ts, args.dry_run)
def testResetToTsInMiddleOfTrip(self): """ - Load data for both days - Run pipelines - Verify that all is well - Reset to a date between the two - Verify that analysis data for the first day is unchanged - Verify that analysis data for the second day does not exist - Re-run pipelines - Verify that all is well """ # Load all data dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22" dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25" start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22}) start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25}) cacheKey_1 = "diary/trips-2016-07-22" cacheKey_2 = "diary/trips-2016-07-25" ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook) ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook) # Run both pipelines etc.setupRealExample(self, dataFile_1) etc.runIntakePipeline(self.testUUID) self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook) etc.setupRealExampleWithEntries(self) etc.runIntakePipeline(self.testUUID) # Check results: so far, so good api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_2).data) # Reset pipeline to july 24. # Note that this is actually 23nd 16:00 PDT # This will reset in the middle of the untracked time, which is # technically a trip, and will allow us to test the trip resetting # code reset_ts = arrow.get("2016-07-24").timestamp epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False) # Second day does not exist api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) logging.debug(json.dumps(api_result, indent=4, default=bju.default)) self.assertEqual(api_result, []) # Re-run the pipeline again etc.runIntakePipeline(self.testUUID) # Should be back to ground truth api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_2).data)
def testResetToTsInMiddleOfPlace(self): """ - Load data for both days - Run pipelines - Verify that all is well - Reset to a date between the two - Verify that analysis data for the first day is unchanged - Verify that analysis data for the second day does not exist - Re-run pipelines - Verify that all is well """ # Load all data dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22" dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25" start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22}) start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25}) cacheKey_1 = "diary/trips-2016-07-22" cacheKey_2 = "diary/trips-2016-07-25" ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook) ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook) # Run both pipelines etc.setupRealExample(self, dataFile_1) etc.runIntakePipeline(self.testUUID) self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook) etc.setupRealExampleWithEntries(self) etc.runIntakePipeline(self.testUUID) # Check results: so far, so good api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_2).data) # Reset pipeline to july 23. # Note that this is actually 22nd 16:00 PDT, so this is partway # through the 22nd reset_ts = arrow.get("2016-07-23").timestamp epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False) # First day is unchanged, except that the last place doesn't have # exit data. # TODO: Modify ground truth to capture this change # Until then, we know that this will fail # api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) # self.compare_result(ad.AttrDict({'result': api_result}).result, # ad.AttrDict(ground_truth_1).data) # Second day does not exist api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) logging.debug(json.dumps(api_result, indent=4, default=bju.default)) self.assertEqual(api_result, []) # Re-run the pipeline again etc.runIntakePipeline(self.testUUID) # Should be back to ground truth api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_2).data)