def test_merge_differing_capitalization(self): # this tests #19 insert_rows(self.scratch_db, 'brand', [ dict(brand='CardScan', company='Newell Rubbermaid', scraper_id='sr.campaign.hrc'), dict(brand='Cardscan', company='Newell Rubbermaid', scraper_id='sr.campaign.hrc'), ]) insert_row(self.output_db, 'scraper_company_map', dict( company='Newell Rubbermaid', scraper_company='Newell Rubbermaid', scraper_id='sr.campaign.hrc') ) build_scraper_brand_map_table(self.output_db, self.scratch_db) self.assertEqual( select_all(self.output_db, 'scraper_brand_map'), [dict(brand='CardScan', company='Newell Rubbermaid', scraper_brand='CardScan', scraper_company='Newell Rubbermaid', scraper_id='sr.campaign.hrc'), dict(brand='CardScan', company='Newell Rubbermaid', scraper_brand='Cardscan', scraper_company='Newell Rubbermaid', scraper_id='sr.campaign.hrc'), ])
def test_dont_push_brand_to_unrelated_subsidiary(self): # tests #59 insert_rows(self.scratch_db, 'brand', [ dict(brand='Dove', company='Unilever', scraper_id='campaign.hrc'), ]) insert_rows(self.output_db, 'scraper_company_map', [ dict(company='Unilever', scraper_company='Unilever', scraper_id='campaign.hrc'), ]) insert_rows(self.output_db, 'subsidiary', [ dict(company='Unilever', company_depth=0, subsidiary="Ben & Jerry's", subsidiary_depth=1), ]) build_scraper_brand_map_table(self.output_db, self.scratch_db) self.assertEqual( select_all(self.output_db, 'scraper_brand_map'), [dict(brand='Dove', company='Unilever', scraper_brand='Dove', scraper_company='Unilever', scraper_id='campaign.hrc'), ])
def test_match_brand_to_subsidiary_name(self): insert_rows(self.scratch_db, 'brand', [ dict(brand='Puma', company='Kering SA', scraper_id='campaign.rankabrand'), ]) insert_rows(self.output_db, 'scraper_company_map', [ dict(company='Kering', scraper_company='Kering SA', scraper_id='campaign.rankabrand'), ]) insert_rows(self.output_db, 'subsidiary', [ dict(company='Kering', company_depth=0, subsidiary='Puma', subsidiary_depth=1), ]) build_scraper_brand_map_table(self.output_db, self.scratch_db) self.assertEqual( select_all(self.output_db, 'scraper_brand_map'), [dict(brand='Puma', company='Puma', scraper_brand='Puma', scraper_company='Kering SA', scraper_id='campaign.rankabrand'), ])
def test_dump_empty_brand(self): insert_rows(self.scratch_db, 'brand', [ dict(brand='™', company='Voidcorp', scraper_id='s'), ]) insert_rows(self.output_db, 'scraper_company_map', [ dict(company='Voidcorp', scraper_company='Voidcorp', scraper_id='s'), ]) build_scraper_brand_map_table(self.output_db, self.scratch_db) self.assertEqual( select_all(self.output_db, 'scraper_brand_map'), [])
def test_prefer_subsidiary_for_brand(self): # tests #16 insert_rows(self.scratch_db, 'brand', [ dict(brand='Puma', company='Puma', scraper_id='campaign.btb_fashion'), dict(brand='Puma', company='Kering SA', scraper_id='campaign.rankabrand'), ]) insert_rows(self.output_db, 'scraper_company_map', [ dict(company='Puma', scraper_company='Puma', scraper_id='campaign.btb_fashion'), dict(company='Kering', scraper_company='Kering SA', scraper_id='campaign.rankabrand'), ]) insert_rows(self.output_db, 'subsidiary', [ dict(company='Kering', company_depth=0, subsidiary='Puma', subsidiary_depth=1), ]) build_scraper_brand_map_table(self.output_db, self.scratch_db) self.assertEqual( select_all(self.output_db, 'scraper_brand_map'), [dict(brand='Puma', company='Puma', scraper_brand='Puma', scraper_company='Kering SA', scraper_id='campaign.rankabrand'), dict(brand='Puma', company='Puma', scraper_brand='Puma', scraper_company='Puma', scraper_id='campaign.btb_fashion'), ])
def test_match_canonical_company_name_only(self): # tests #40 insert_rows(self.scratch_db, 'brand', [ dict(brand='Asus', company='Asus', scraper_id='campaign.btb_electronics'), dict(brand='Asus', company='ASUSTeK Computer Incorporated', scraper_id='campaign.rankabrand'), ]) # we picked ASUS as the canonical name based on company_name insert_rows(self.output_db, 'scraper_company_map', [ dict(company='ASUS', scraper_id='campaign.btb_electronics', scraper_company='Asus'), dict(company='ASUS', scraper_id='campaign.rankabrand', scraper_company='ASUSTeK Computer Incorporated'), ]) build_scraper_brand_map_table(self.output_db, self.scratch_db) self.assertEqual( select_all(self.output_db, 'scraper_brand_map'), [ dict(brand='ASUS', company='ASUS', scraper_brand='Asus', scraper_company='ASUSTeK Computer Incorporated', scraper_id='campaign.rankabrand'), dict(brand='ASUS', company='ASUS', scraper_brand='Asus', scraper_company='Asus', scraper_id='campaign.btb_electronics'), ])
def test_merge_hyphens(self): # tests #31 insert_rows(self.scratch_db, 'brand', [ dict(brand='Liquid Plumr', company='Clorox', scraper_id='company.clorox'), dict(brand='Liquid-Plumr', company='Clorox', scraper_id='campaign.hrc'), ]) insert_rows(self.output_db, 'scraper_company_map', [ dict(company='Clorox', scraper_id='company.clorox', scraper_company='Clorox'), dict(company='Clorox', scraper_id='campaign.hrc', scraper_company='Clorox'), ]) build_scraper_brand_map_table(self.output_db, self.scratch_db) self.assertEqual( select_all(self.output_db, 'scraper_brand_map'), [ dict(brand='Liquid-Plumr', company='Clorox', scraper_brand='Liquid Plumr', scraper_company='Clorox', scraper_id='company.clorox'), dict(brand='Liquid-Plumr', company='Clorox', scraper_brand='Liquid-Plumr', scraper_company='Clorox', scraper_id='campaign.hrc'), ])