def save_to_db(self, records): for record in records: registration_text = record.xpath('REGISTRATION')[0].text termination_text = record.xpath('TERMINATED_INFO')[0].text status = self.save_or_get_status(record.xpath('STAN')[0].text) #first getting date, then registration info if REGISTRATION.text exists registration_date = None registration_info = None if registration_text: registration_date = format_date_to_yymmdd( self.get_first_word(registration_text)) registration_info = cut_first_word(registration_text) estate_manager = record.xpath('ESTATE_MANAGER')[0].text termination_date = None terminated_info = None if termination_text: termination_date = format_date_to_yymmdd( get_first_word(termination_text)) terminated_info = cut_first_word(termination_text) termination_cancel_info = record.xpath( 'TERMINATION_CANCEL_INFO')[0].text contact_info = record.xpath('CONTACTS')[0].text vp_dates = record.xpath('VP_DATES')[0].text authority = self.save_or_get_authority( record.xpath('CURRENT_AUTHORITY')[0].text) fullname = record.xpath('NAME')[0].text address = record.xpath('ADDRESS')[0].text hash_code = abs(hash(fullname + address)) % (10**9) if hash_code in self.all_fops_dict: fop = self.update_fop_fields(hash_code, status, registration_date, registration_info, estate_manager, termination_date, terminated_info, termination_cancel_info, contact_info, vp_dates, authority) else: fop = self.create_new_fop(hash_code, fullname, address, status, registration_date, registration_info, estate_manager, termination_date, terminated_info, termination_cancel_info, contact_info, vp_dates, authority) self.all_fops_dict[hash_code] = fop fop_kveds = record.xpath('ACTIVITY_KINDS')[0] if len(fop_kveds): self.add_fop_kveds_to_list(fop_kveds, hash_code) exchange_data = record.xpath('EXCHANGE_DATA')[0] if len(exchange_data): self.add_exchange_data_to_list(exchange_data, hash_code) self.bulk_manager.add_create(fop) self.bulk_manager._commit_create(Fop) time.sleep(3) self.bulk_manager._create_queues['business_register.Fop'] = [] self.save_fop_kveds_to_db() self.save_exchange_data_to_db()
def save_detail_company_to_db(self, records): for record in records: name = record.xpath('NAME')[0].text.lower() short_name = record.xpath('SHORT_NAME')[0].text if short_name: short_name = short_name.lower() company_type = record.xpath('OPF')[0].text if company_type: company_type = self.save_or_get_company_type( company_type, 'uk') edrpou = record.xpath('EDRPOU')[0].text if not edrpou: self.report.invalid_data += 1 continue code = name + edrpou address = record.xpath('ADDRESS')[0].text status = self.save_or_get_status(record.xpath('STAN')[0].text) founding_document_number = record.xpath( 'FOUNDING_DOCUMENT_NUM')[0].text executive_power = record.xpath('EXECUTIVE_POWER')[0].text if executive_power: executive_power = executive_power.lower() # if len(record.xpath('ACTIVITY_KINDS')[0]): # self.add_company_to_kved(record.xpath('ACTIVITY_KINDS')[0], code) superior_management = record.xpath('SUPERIOR_MANAGEMENT')[0].text if superior_management: superior_management = superior_management.lower() # if len(record.xpath('SIGNERS')[0]): # self.add_signers(record.xpath('SIGNERS')[0], code) authorized_capital = record.xpath('AUTHORIZED_CAPITAL')[0].text if authorized_capital: authorized_capital = authorized_capital.replace(',', '.') authorized_capital = float(authorized_capital) bylaw = self.save_or_get_bylaw(record.xpath('STATUTE')[0].text) registration_date = None registration_info = None registration = record.xpath('REGISTRATION')[0].text if registration: registration_date = format_date_to_yymmdd( get_first_word(registration)) registration_info = cut_first_word(registration) managing_paper = record.xpath('MANAGING_PAPER')[0].text if managing_paper: managing_paper = managing_paper.lower() # TODO: refactor branches storing # if len(record.xpath('BRANCHES')[0]): # self.add_branches(record.xpath('BRANCHES')[0], code) # if record.xpath('TERMINATION_STARTED_INFO/OP_DATE'): # self.add_termination_started(record, code) # if record.xpath('BANKRUPTCY_READJUSTMENT_INFO/OP_DATE'): # self.add_bancruptcy_readjustment(record, code) # if len(record.xpath('PREDECESSORS')[0]): # self.add_company_to_predecessors(record.xpath('PREDECESSORS')[0], code) # if len(record.xpath('ASSIGNEES')[0]): # self.add_assignees(record.xpath('ASSIGNEES')[0], code) terminated_info = record.xpath('TERMINATED_INFO')[0].text if terminated_info: terminated_info = terminated_info.lower() termination_cancel_info = record.xpath( 'TERMINATION_CANCEL_INFO')[0].text if termination_cancel_info: termination_cancel_info = termination_cancel_info.lower() contact_info = record.xpath('CONTACTS')[0].text # if record.xpath('EXCHANGE_DATA')[0]: # self.add_exchange_data(record.xpath('EXCHANGE_DATA')[0], code) vp_dates = record.xpath('VP_DATES')[0].text authority = self.save_or_get_authority( record.xpath('CURRENT_AUTHORITY')[0].text) # self.add_company_detail(founding_document_number, executive_power, superior_management, managing_paper, # terminated_info, termination_cancel_info, vp_dates, code) # ToDo: resolve the problem of having records with the same company name amd edrpou # that results in the same code company = Company.objects.filter(code=code).first() if not company: company = Company(name=name, short_name=short_name, company_type=company_type, edrpou=edrpou, authorized_capital=authorized_capital, status=status, bylaw=bylaw, registration_date=registration_date, registration_info=registration_info, contact_info=contact_info, authority=authority, code=code) company.save() # self.bulk_manager.add_create(company) else: update_fields = [] if company.name != name: company.name = name update_fields.append('name') if company.short_name != short_name: company.short_name = short_name update_fields.append('short_name') if company.company_type != company_type: company.company_type = company_type update_fields.append('company_type') if company.authorized_capital != authorized_capital: company.authorized_capital = authorized_capital update_fields.append('authorized_capital') if company.address != address: company.address = address update_fields.append('address') if company.status != status: company.status = status update_fields.append('status') if company.bylaw != bylaw: company.bylaw = bylaw update_fields.append('bylaw') if to_lower_string_if_exists( company.registration_date) != registration_date: company.registration_date = registration_date update_fields.append('registration_date') if company.registration_info != registration_info: company.registration_info = registration_info update_fields.append('registration_info') if company.contact_info != contact_info: company.contact_info = contact_info update_fields.append('contact_info') if company.authority != authority: company.authority = authority update_fields.append('authority') if update_fields: update_fields.append('updated_at') company.save(update_fields=update_fields) # self.bulk_manager.add_update(company) if len(record.xpath('FOUNDERS')[0]): self.save_or_update_founders( record.xpath('FOUNDERS')[0], company) # if len(self.bulk_manager.update_queues['business_register.Company']): # self.bulk_manager.commit_update(Company, ['name', 'short_name', 'company_type', # 'authorized_capital', 'address', 'status', # 'bylaw', 'registration_date', # 'registration_info', 'contact_info', # 'authority']) # if len(self.bulk_manager.create_queues['business_register.Company']): # self.bulk_manager.commit_create(Company) if len(self.bulk_manager.queues['business_register.Founder']): self.bulk_manager.commit(Founder) self.bulk_manager.queues['business_register.Founder'] = []
def save_detailed_fop_to_db(self, records): for record in records: fullname = record.xpath('NAME')[0].text if not fullname: logger.warning(f'ФОП без прізвища: {record}') self.report.invalid_data += 1 continue if len(fullname) > 100: logger.warning(f'ФОП із задовгим прізвищем: {record}') continue if fullname: fullname = fullname.lower() address = record.xpath('ADDRESS')[0].text if not address: address = 'EMPTY' code = fullname + address status = self.save_or_get_status(record.xpath('STAN')[0].text) registration_text = record.xpath('REGISTRATION')[0].text # first getting date, then registration info if REGISTRATION.text exists registration_date = None registration_info = None if registration_text: registration_date = format_date_to_yymmdd(get_first_word(registration_text)) registration_info = cut_first_word(registration_text) estate_manager = record.xpath('ESTATE_MANAGER')[0].text termination_text = record.xpath('TERMINATED_INFO')[0].text termination_date = None terminated_info = None if termination_text: termination_date = format_date_to_yymmdd(get_first_word(termination_text)) terminated_info = cut_first_word(termination_text) termination_cancel_info = record.xpath('TERMINATION_CANCEL_INFO')[0].text contact_info = record.xpath('CONTACTS')[0].text vp_dates = record.xpath('VP_DATES')[0].text authority = self.save_or_get_authority(record.xpath('CURRENT_AUTHORITY')[0].text) fop_kveds = record.xpath('ACTIVITY_KINDS')[0] exchange_data = record.xpath('EXCHANGE_DATA')[0] fop = Fop.objects.filter(code=code).first() if not fop: fop = Fop( fullname=fullname, address=address, status=status, registration_date=registration_date, registration_info=registration_info, estate_manager=estate_manager, termination_date=termination_date, terminated_info=terminated_info, termination_cancel_info=termination_cancel_info, contact_info=contact_info, vp_dates=vp_dates, authority=authority, code=code) self.bulk_manager.add(fop) if len(fop_kveds): self.add_fop_kveds_to_dict(fop_kveds, code) if len(exchange_data): self.add_fop_exchange_data_to_dict(exchange_data, code) else: # TODO: make a decision: our algorithm when Fop changes fullname or address? update_fields = [] if fop.status != status: fop.status = status update_fields.append('status') if fop.registration_date and str(fop.registration_date) != registration_date: fop.registration_date = registration_date update_fields.append('registration_date') if fop.registration_info != registration_info: fop.registration_info = registration_info update_fields.append('registration_info') if fop.estate_manager != estate_manager: fop.estate_manager = estate_manager update_fields.append('estate_manager') if fop.termination_date and str(fop.termination_date) != termination_date: fop.termination_date = termination_date update_fields.append('termination_date') if fop.terminated_info != terminated_info: fop.terminated_info = terminated_info update_fields.append('terminated_info') if fop.termination_cancel_info != termination_cancel_info: fop.termination_cancel_info = termination_cancel_info update_fields.append('termination_cancel_info') if fop.contact_info != contact_info: fop.contact_info = contact_info update_fields.append('contact_info') if fop.vp_dates != vp_dates: fop.vp_dates = vp_dates update_fields.append('vp_dates') if fop.authority != authority: fop.authority = authority update_fields.append('authority') if len(update_fields): update_fields.append('updated_at') fop.save(update_fields=update_fields) if len(fop_kveds): self.update_fop_kveds(fop_kveds, fop) if len(exchange_data): self.update_fop_exchange_data(exchange_data, fop) if len(self.bulk_manager.queues['business_register.Fop']): self.bulk_manager.commit(Fop) for fop in self.bulk_manager.queues['business_register.Fop']: if fop.code not in self.new_fops_foptokveds: continue foptokveds = self.new_fops_foptokveds[fop.code] for foptokved in foptokveds: foptokved.fop = fop self.bulk_manager.add(foptokved) self.new_fops_foptokveds = {} for fop in self.bulk_manager.queues['business_register.Fop']: if fop.code not in self.new_fops_exchange_data: continue fop_exchangedata = self.new_fops_exchange_data[fop.code] for exchangedata in fop_exchangedata: exchangedata.fop = fop self.bulk_manager.add(exchangedata) self.new_fops_exchange_data = {} self.bulk_manager.queues['business_register.Fop'] = [] if len(self.bulk_manager.queues['business_register.FopToKved']): self.bulk_manager.commit(FopToKved) if len(self.bulk_manager.queues['business_register.ExchangeDataFop']): self.bulk_manager.commit(ExchangeDataFop) self.bulk_manager.queues['business_register.FopToKved'] = [] self.bulk_manager.queues['business_register.ExchangeDataFop'] = []
def save_to_db(self, records): self.bylaw = None self.company_type = None for record in records: self.authority = self.save_or_get_authority(record.xpath('CURRENT_AUTHORITY')[0].text) self.status = self.save_or_get_status(record.xpath('STAN')[0].text) self.save_or_get_bylaw(record.xpath('STATUTE')[0].text) self.save_or_get_company_type(record.xpath('OPF')[0].text) edrpou = record.xpath('EDRPOU')[0].text or Company.INVALID registration_date = None registration_info = None registration = record.xpath('REGISTRATION')[0].text if registration: registration_date = format_date_to_yymmdd( get_first_word(registration)) or None registration_info = cut_first_word(registration) or None # try: # company = Company.objects.filter( # hash_code=self.create_hash_code(record.xpath('NAME')[0].text, edrpou)).first() # company.short_name = record.xpath('SHORT_NAME')[0].text # company.company_type = self.company_type # company.address = record.xpath('ADDRESS')[0].text # company.status = self.status # company.bylaw = self.bylaw # company.registration_date = registration_date # company.registration_info = registration_info # company.contact_info = record.xpath('CONTACTS')[0].text # company.authority = self.authority # self.bulk_manager.add_update(company) # # print('update') # except: # # company = self.company_create(record, edrpou, registration_date, registration_info) # self.bulk_manager.add_create(company) # # print('create') company = self.company_create(record, edrpou, registration_date, registration_info) self.bulk_manager.add_create(company) print('create') self.add_branches(record, edrpou) self.add_assignees(record, edrpou) self.add_company_detail(record, edrpou) self.add_company_to_kved(record.xpath( 'ACTIVITY_KINDS')[0], record.xpath('NAME')[0].text, edrpou) self.add_bancruptcy_readjustment(record, edrpou) self.add_exchange_data(record.xpath('EXCHANGE_DATA')[0], record.xpath('NAME')[0].text, edrpou) self.add_founders(record, edrpou) self.add_company_to_predecessors(record, edrpou) self.add_signers(record, edrpou) self.add_termination_started(record, edrpou) if len(self.bulk_manager._update_queues['business_register.Company']) > 0: self.bulk_manager._commit_update(Company, ['name', 'short_name', 'company_type', 'edrpou']) self.bulk_manager._commit_create(Company) company_update_dict = {} company_create_dict = {} for company in self.bulk_manager._update_queues['business_register.Company']: company_update_dict[company.hash_code] = company for company in self.bulk_manager._create_queues['business_register.Company']: company_create_dict[company.hash_code] = company self.bulk_manager._update_queues['business_register.Company'] = [] self.bulk_manager._create_queues['business_register.Company'] = [] for branch in self.branch_bulk_manager._create_queues['business_register.Company']: if self.branch_to_parent[branch.hash_code] in company_update_dict: branch.parent = company_update_dict[self.branch_to_parent[branch.hash_code]] else: branch.parent = company_create_dict[self.branch_to_parent[branch.hash_code]] for branch in self.branch_bulk_manager._update_queues['business_register.Company']: if self.branch_to_parent[branch.hash_code] in company_update_dict: branch.parent = company_update_dict[self.branch_to_parent[branch.hash_code]] else: branch.parent = company_create_dict[self.branch_to_parent[branch.hash_code]] branch_to_parent = {} for assignee in self.bulk_manager._create_queues['business_register.Assignee']: if assignee.hash_code in company_update_dict: assignee.company = company_update_dict[assignee.hash_code] else: assignee.company = company_create_dict[assignee.hash_code] for company_to_kved in self.bulk_manager._create_queues['business_register.CompanyToKved']: if company_to_kved.hash_code in company_update_dict: company_to_kved.company = company_update_dict[company_to_kved.hash_code] else: company_to_kved.company = company_create_dict[company_to_kved.hash_code] for exchange_data in self.bulk_manager._create_queues['business_register.ExchangeDataCompany']: if exchange_data.hash_code in company_update_dict: exchange_data.company = company_update_dict[exchange_data.hash_code] else: exchange_data.company = company_create_dict[exchange_data.hash_code] for founder in self.bulk_manager._create_queues['business_register.FounderFull']: if founder.hash_code in company_update_dict: founder.company = company_update_dict[founder.hash_code] else: founder.company = company_create_dict[founder.hash_code] for bancruptcy_readjustment in self.bulk_manager._create_queues['business_register.BancruptcyReadjustment']: if bancruptcy_readjustment.hash_code in company_update_dict: bancruptcy_readjustment.company = company_update_dict[bancruptcy_readjustment.hash_code] else: bancruptcy_readjustment.company = company_create_dict[bancruptcy_readjustment.hash_code] for company_detail in self.bulk_manager._create_queues['business_register.CompanyDetail']: if company_detail.hash_code in company_update_dict: company_detail.company = company_update_dict[company_detail.hash_code] else: company_detail.company = company_create_dict[company_detail.hash_code] for company_to_predecessor in self.bulk_manager._create_queues['business_register.CompanyToPredecessor']: if company_to_predecessor.hash_code in company_update_dict: company_to_predecessor.company = company_update_dict[company_to_predecessor.hash_code] else: company_to_predecessor.company = company_create_dict[company_to_predecessor.hash_code] for signer in self.bulk_manager._create_queues['business_register.Signer']: if signer.hash_code in company_update_dict: signer.company = company_update_dict[signer.hash_code] else: signer.company = company_create_dict[signer.hash_code] for termination_started in self.bulk_manager._create_queues['business_register.TerminationStarted']: if termination_started.hash_code in company_update_dict: termination_started.company = company_update_dict[termination_started.hash_code] else: termination_started.company = company_create_dict[termination_started.hash_code] self.bulk_manager._commit_create(Assignee) self.bulk_manager._commit_create(FounderFull) self.bulk_manager._commit_create(BancruptcyReadjustment) self.bulk_manager._commit_create(CompanyDetail) self.bulk_manager._commit_create(CompanyToKved) self.bulk_manager._commit_create(ExchangeDataCompany) self.bulk_manager._commit_create(CompanyToPredecessor) self.bulk_manager._commit_create(Signer) self.bulk_manager._commit_create(TerminationStarted) if len(self.branch_bulk_manager._update_queues['business_register.Company']) > 0: self.branch_bulk_manager._commit_update(Company, ['name', 'short_name']) self.branch_bulk_manager._commit_create(Company) company_update_dict = {} company_create_dict = {} for company in self.branch_bulk_manager._update_queues['business_register.Company']: company_update_dict[company.hash_code] = company for company in self.branch_bulk_manager._create_queues['business_register.Company']: company_create_dict[company.hash_code] = company self.bulk_manager._create_queues['business_register.Assignee'] = [] self.bulk_manager._create_queues['business_register.FounderFull'] = [] self.bulk_manager._create_queues['business_register.BancruptcyReadjustment'] = [] self.bulk_manager._create_queues['business_register.CompanyDetail'] = [] self.bulk_manager._create_queues['business_register.CompanyToKved'] = [] self.bulk_manager._create_queues['business_register.ExchangeDataCompany'] = [] self.bulk_manager._create_queues['business_register.CompanyToPredecessor'] = [] self.bulk_manager._create_queues['business_register.Signer'] = [] self.bulk_manager._create_queues['business_register.TerminationStarted'] = [] self.branch_bulk_manager._update_queues['business_register.Company'] = [] self.branch_bulk_manager._create_queues['business_register.Company'] = [] for company_to_kved in self.branch_bulk_manager._create_queues['business_register.CompanyToKved']: if company_to_kved.hash_code in company_update_dict: company_to_kved.company = company_update_dict[company_to_kved.hash_code] else: company_to_kved.company = company_create_dict[company_to_kved.hash_code] for exchange_data in self.branch_bulk_manager._create_queues['business_register.ExchangeDataCompany']: if exchange_data.hash_code in company_update_dict: exchange_data.company = company_update_dict[exchange_data.hash_code] else: exchange_data.company = company_create_dict[exchange_data.hash_code] for signer in self.branch_bulk_manager._create_queues['business_register.Signer']: if signer.hash_code in company_update_dict: signer.company = company_update_dict[signer.hash_code] else: signer.company = company_create_dict[signer.hash_code] self.branch_bulk_manager._commit_create(CompanyToKved) self.branch_bulk_manager._commit_create(ExchangeDataCompany) self.branch_bulk_manager._commit_create(Signer) self.branch_bulk_manager._create_queues['business_register.CompanyToKved'] = [] self.branch_bulk_manager._create_queues['business_register.ExchangeDataCompany'] = [] self.branch_bulk_manager._create_queues['business_register.Signer'] = []