def test_load_whitelist(s3_fixture): conn, data = s3_fixture etl = taar_amowhitelist.AMOTransformer(taar_amowhitelist.AMO_DUMP_BUCKET, taar_amowhitelist.AMO_DUMP_PREFIX, taar_amowhitelist.AMO_DUMP_FILENAME, taar_amowhitelist.MIN_RATING, taar_amowhitelist.MIN_AGE) etl.transform(data) etl.load() s3 = boto3.resource('s3', region_name='us-west-2') bucket_obj = s3.Bucket(taar_amowhitelist.AMO_DUMP_BUCKET) available_objects = list( bucket_obj.objects.filter(Prefix=taar_amowhitelist.AMO_DUMP_PREFIX)) # Check that whitelist file exists full_s3_name = '{}{}'.format(taar_amowhitelist.AMO_DUMP_PREFIX, taar_amowhitelist.FILTERED_AMO_FILENAME) keys = [o.key for o in available_objects] assert full_s3_name in keys # Check that featured addon file exists full_s3_name = '{}{}'.format(taar_amowhitelist.AMO_DUMP_PREFIX, taar_amowhitelist.FEATURED_FILENAME) keys = [o.key for o in available_objects] assert full_s3_name in keys
def test_transform_whitelist(s3_fixture): ''' The transform for the AMOTransformer is just filtering by age using `first_create_date` and using the ratings.average with a minimum of 3.0 ''' conn, data = s3_fixture etl = taar_amowhitelist.AMOTransformer(taar_amowhitelist.AMO_DUMP_BUCKET, taar_amowhitelist.AMO_DUMP_PREFIX, taar_amowhitelist.AMO_DUMP_FILENAME, taar_amowhitelist.MIN_RATING, taar_amowhitelist.MIN_AGE) etl.transform(data) final_jdata = etl.get_whitelist() assert len(final_jdata) == 1 today = datetime.datetime.today().replace(tzinfo=None) for client_data in final_jdata.values(): assert client_data['current_version']['files'][0]['is_webextension'] assert client_data['ratings']['average'] >= taar_amowhitelist.MIN_RATING create_datetime = parse( client_data['first_create_date']).replace(tzinfo=None) assert create_datetime + datetime.timedelta( days=taar_amowhitelist.MIN_AGE) < today assert 'is_featured' in client_data # Verify that the platform data is in the transform output assert client_data['current_version']['files'][0][ 'platform'] in VALID_PLATFORMS
def test_transform_featuredlist(s3_fixture): """ The transform for the AMOTransformer is just filtering by age using `first_create_date` and using the ratings.average with a minimum of 3.0 """ conn, data = s3_fixture etl = taar_amowhitelist.AMOTransformer( taar_amowhitelist.AMO_DUMP_BUCKET, taar_amowhitelist.AMO_DUMP_PREFIX, taar_amowhitelist.AMO_DUMP_FILENAME, taar_amowhitelist.MIN_RATING, taar_amowhitelist.MIN_AGE, ) etl.transform(data) final_jdata = etl.get_featuredlist() # There's 4 records in SAMPLE_DATA - only one is marked is not # featured assert len(final_jdata) == 3 for rec in list(final_jdata.values()): assert rec["is_featured"]
def test_extract(s3_fixture): ''' The transform for the AMOTransformer is just filtering by age using `first_create_date` and using the ratings.average with a minimum of 3.0 ''' etl = taar_amowhitelist.AMOTransformer(taar_amowhitelist.AMO_DUMP_BUCKET, taar_amowhitelist.AMO_DUMP_PREFIX, taar_amowhitelist.AMO_DUMP_FILENAME, taar_amowhitelist.MIN_RATING, taar_amowhitelist.MIN_AGE) jdata = etl.extract() assert jdata == SAMPLE_DATA
def test_load(s3_fixture): conn, data = s3_fixture etl = taar_amowhitelist.AMOTransformer(taar_amowhitelist.AMO_DUMP_BUCKET, taar_amowhitelist.AMO_DUMP_PREFIX, taar_amowhitelist.AMO_DUMP_FILENAME, taar_amowhitelist.MIN_RATING, taar_amowhitelist.MIN_AGE) etl.load(EXPECTED_FINAL_JDATA) s3 = boto3.resource('s3', region_name='us-west-2') bucket_obj = s3.Bucket(taar_amowhitelist.AMO_DUMP_BUCKET) available_objects = list(bucket_obj.objects.filter(Prefix=taar_amowhitelist.AMO_DUMP_PREFIX)) # Check that our file is there. full_s3_name = '{}{}'.format(taar_amowhitelist.AMO_DUMP_PREFIX, taar_amowhitelist.FILTERED_AMO_FILENAME) keys = [o.key for o in available_objects] assert full_s3_name in keys
def test_transform(s3_fixture): ''' The transform for the AMOTransformer is just filtering by age using `first_create_date` and using the ratings.average with a minimum of 3.0 ''' conn, data = s3_fixture etl = taar_amowhitelist.AMOTransformer(taar_amowhitelist.AMO_DUMP_BUCKET, taar_amowhitelist.AMO_DUMP_PREFIX, taar_amowhitelist.AMO_DUMP_FILENAME, taar_amowhitelist.MIN_RATING, taar_amowhitelist.MIN_AGE) final_jdata = etl.transform(data) assert len(final_jdata) == 2 today = datetime.datetime.today().replace(tzinfo=None) for client_data in final_jdata.values(): assert client_data['ratings']['average'] >= taar_amowhitelist.MIN_RATING create_datetime = parse(client_data['first_create_date']).replace(tzinfo=None) assert create_datetime + datetime.timedelta(days=taar_amowhitelist.MIN_AGE) < today