def get_domains_index(companies): domains_index = {} for company in companies: for domain in company['domains']: domains_index[domain] = company['company_name'] if 'aliases' in company: for alias in company['aliases']: normalized_alias = utils.normalize_company_name(alias) domains_index[normalized_alias] = company['company_name'] normalized_company_name = utils.normalize_company_name( company['company_name']) domains_index[normalized_company_name] = company['company_name'] return domains_index
def _store_companies(runtime_storage_inst, companies): domains_index = {} for company in companies: for domain in company['domains']: domains_index[domain] = company['company_name'] if 'aliases' in company: for alias in company['aliases']: normalized_alias = utils.normalize_company_name(alias) domains_index[normalized_alias] = company['company_name'] normalized_company_name = utils.normalize_company_name( company['company_name']) domains_index[normalized_company_name] = company['company_name'] runtime_storage_inst.set_by_key('companies', domains_index)
def _process_member(self, record): user_id = user_processor.make_user_id(member_id=record["member_id"]) record["primary_key"] = user_id record["date"] = utils.member_date_to_timestamp(record["date_joined"]) record["author_name"] = record["member_name"] record["module"] = "unknown" company_draft = record["company_draft"] company_name = self.domains_index.get(utils.normalize_company_name(company_draft)) or ( utils.normalize_company_draft(company_draft) ) # author_email is a key to create new user record["author_email"] = user_id record["company_name"] = company_name # _update_record_and_user function will create new user if needed self._update_record_and_user(record) record["company_name"] = company_name user = user_processor.load_user(self.runtime_storage_inst, user_id) user["user_name"] = record["author_name"] user["companies"] = [{"company_name": company_name, "end_date": 0}] user["company_name"] = company_name user_processor.store_user(self.runtime_storage_inst, user) record["company_name"] = company_name yield record
def _update_members_company_name(self): LOG.debug('Update members with company names') for record in self.runtime_storage_inst.get_all_records(): if record['record_type'] != 'member': continue company_draft = record['company_draft'] company_name = self.domains_index.get( utils.normalize_company_name(company_draft)) or ( utils.normalize_company_draft(company_draft)) if company_name == record['company_name']: continue LOG.debug('Update record %s, company name changed to %s', record, company_name) record['company_name'] = company_name yield record user = user_processor.load_user(self.runtime_storage_inst, record['user_id']) LOG.debug('Update user %s, company name changed to %s', user, company_name) user['companies'] = [{ 'company_name': company_name, 'end_date': 0, }] user_processor.store_user(self.runtime_storage_inst, user)
def record_handler(record): if record['record_type'] != 'member': return company_draft = record['company_draft'] company_name = self.domains_index.get( utils.normalize_company_name(company_draft)) or ( utils.normalize_company_draft(company_draft)) if company_name == record['company_name']: return LOG.debug('Update record %s, company name changed to %s', record, company_name) record['company_name'] = company_name yield record user = user_processor.load_user(self.runtime_storage_inst, record['user_id']) LOG.debug('Update user %s, company name changed to %s', user, company_name) user['companies'] = [{ 'company_name': company_name, 'end_date': 0, }] user_processor.store_user(self.runtime_storage_inst, user)
def _process_member(self, record): user_id = user_processor.make_user_id(member_id=record['member_id']) record['primary_key'] = user_id record['date'] = utils.member_date_to_timestamp(record['date_joined']) record['author_name'] = record['member_name'] record['module'] = 'unknown' company_draft = record['company_draft'] company_name = self.domains_index.get( utils.normalize_company_name(company_draft)) or ( utils.normalize_company_draft(company_draft)) # author_email is a key to create new user record['author_email'] = user_id record['company_name'] = company_name # _update_record_and_user function will create new user if needed self._update_record_and_user(record) record['company_name'] = company_name user = user_processor.load_user(self.runtime_storage_inst, user_id) user['user_name'] = record['author_name'] user['companies'] = [{ 'company_name': company_name, 'end_date': 0, }] user['company_name'] = company_name user_processor.store_user(self.runtime_storage_inst, user) record['company_name'] = company_name yield record
def _update_members_company_name(self): LOG.debug('Update members with company names') for record in self.runtime_storage_inst.get_all_records(): if record['record_type'] != 'member': continue company_draft = record['company_draft'] company_name = self.domains_index.get( utils.normalize_company_name(company_draft)) or ( utils.normalize_company_draft(company_draft)) if company_name == record['company_name']: continue LOG.debug('Update record %s, company name changed to %s', record, company_name) record['company_name'] = company_name yield record user = utils.load_user(self.runtime_storage_inst, record['user_id']) LOG.debug('Update user %s, company name changed to %s', user, company_name) user['companies'] = [{ 'company_name': company_name, 'end_date': 0, }] utils.store_user(self.runtime_storage_inst, user)
def _process_member(self, record): user_id = user_processor.make_user_id(member_id=record['member_id']) record['primary_key'] = user_id record['date'] = utils.member_date_to_timestamp(record['date_joined']) record['author_name'] = record['member_name'] record['module'] = 'unknown' company_draft = record['company_draft'] company_name = self.domains_index.get(utils.normalize_company_name( company_draft)) or (utils.normalize_company_draft(company_draft)) # author_email is a key to create new user record['author_email'] = user_id record['company_name'] = company_name # _update_record_and_user function will create new user if needed self._update_record_and_user(record) record['company_name'] = company_name user = user_processor.load_user(self.runtime_storage_inst, user_id) user['user_name'] = record['author_name'] user['companies'] = [{ 'company_name': company_name, 'end_date': 0, }] user['company_name'] = company_name user_processor.store_user(self.runtime_storage_inst, user) record['company_name'] = company_name yield record
def test_resolve_companies_aliases(self): domains_index = { utils.normalize_company_name('IBM India'): 'IBM', utils.normalize_company_name('IBM Japan'): 'IBM', } user = [ dict(company_name='IBM India', end_date=1234567890), dict(company_name='IBM Japan', end_date=2234567890), dict(company_name='Intel', end_date=0), ] observed = user_processor.resolve_companies_aliases( domains_index, user) expected = [ dict(company_name='IBM', end_date=2234567890), dict(company_name='Intel', end_date=0), ] self.assertEqual(expected, observed)
def _get_changed_member_records(runtime_storage_inst, record_processor_inst): for record in runtime_storage_inst.get_all_records(): if record['record_type'] == 'member' and 'company_name' in record: company_draft = record['company_draft'] company_name = record_processor_inst.domains_index.get( utils.normalize_company_name(company_draft)) or company_draft if company_name != record['company_name']: record['company_name'] = company_name yield record
def test_normalize_company_name(self): company_names = ['EMC Corporation', 'Abc, corp..', 'Mirantis IT.', 'Red Hat, Inc.', 'abc s.r.o. ABC', '2s.r.o. co', 'AL.P.B L.P. s.r.o. s.r.o. C ltd.'] correct_normalized_company_names = ['emc', 'abc', 'mirantis', 'redhat', 'abcabc', '2sro', 'alpbc'] normalized_company_names = [utils.normalize_company_name(name) for name in company_names] self.assertEqual(normalized_company_names, correct_normalized_company_names)
def resolve_companies_aliases(domains_index, companies): norm_companies = [] prev_company_name = None for c in reversed(companies): company_name = c['company_name'] company_name = (domains_index.get( utils.normalize_company_name(company_name)) or (utils.normalize_company_draft(company_name))) if company_name != prev_company_name: r = copy.deepcopy(c) r['company_name'] = company_name norm_companies.append(r) prev_company_name = company_name return list(reversed(norm_companies))
def record_handler(record): if record["record_type"] != "member": return company_draft = record["company_draft"] company_name = self.domains_index.get(utils.normalize_company_name(company_draft)) or ( utils.normalize_company_draft(company_draft) ) if company_name == record["company_name"]: return LOG.debug("Update record %s, company name changed to %s", record, company_name) record["company_name"] = company_name yield record user = user_processor.load_user(self.runtime_storage_inst, record["user_id"]) LOG.debug("Update user %s, company name changed to %s", user, company_name) user["companies"] = [{"company_name": company_name, "end_date": 0}] user_processor.store_user(self.runtime_storage_inst, user)