示例#1
0
 def save_to_db(self, records):
     for record in records:
         registration_text = record.xpath('REGISTRATION')[0].text
         termination_text = record.xpath('TERMINATED_INFO')[0].text
         status = self.save_or_get_status(record.xpath('STAN')[0].text)
         #first getting date, then registration info if REGISTRATION.text exists
         registration_date = None
         registration_info = None
         if registration_text:
             registration_date = format_date_to_yymmdd(
                 self.get_first_word(registration_text))
             registration_info = cut_first_word(registration_text)
         estate_manager = record.xpath('ESTATE_MANAGER')[0].text
         termination_date = None
         terminated_info = None
         if termination_text:
             termination_date = format_date_to_yymmdd(
                 get_first_word(termination_text))
             terminated_info = cut_first_word(termination_text)
         termination_cancel_info = record.xpath(
             'TERMINATION_CANCEL_INFO')[0].text
         contact_info = record.xpath('CONTACTS')[0].text
         vp_dates = record.xpath('VP_DATES')[0].text
         authority = self.save_or_get_authority(
             record.xpath('CURRENT_AUTHORITY')[0].text)
         fullname = record.xpath('NAME')[0].text
         address = record.xpath('ADDRESS')[0].text
         hash_code = abs(hash(fullname + address)) % (10**9)
         if hash_code in self.all_fops_dict:
             fop = self.update_fop_fields(hash_code, status,
                                          registration_date,
                                          registration_info, estate_manager,
                                          termination_date, terminated_info,
                                          termination_cancel_info,
                                          contact_info, vp_dates, authority)
         else:
             fop = self.create_new_fop(hash_code, fullname, address, status,
                                       registration_date, registration_info,
                                       estate_manager, termination_date,
                                       terminated_info,
                                       termination_cancel_info,
                                       contact_info, vp_dates, authority)
             self.all_fops_dict[hash_code] = fop
         fop_kveds = record.xpath('ACTIVITY_KINDS')[0]
         if len(fop_kveds):
             self.add_fop_kveds_to_list(fop_kveds, hash_code)
         exchange_data = record.xpath('EXCHANGE_DATA')[0]
         if len(exchange_data):
             self.add_exchange_data_to_list(exchange_data, hash_code)
         self.bulk_manager.add_create(fop)
     self.bulk_manager._commit_create(Fop)
     time.sleep(3)
     self.bulk_manager._create_queues['business_register.Fop'] = []
     self.save_fop_kveds_to_db()
     self.save_exchange_data_to_db()
示例#2
0
 def save_detail_company_to_db(self, records):
     for record in records:
         name = record.xpath('NAME')[0].text.lower()
         short_name = record.xpath('SHORT_NAME')[0].text
         if short_name:
             short_name = short_name.lower()
         company_type = record.xpath('OPF')[0].text
         if company_type:
             company_type = self.save_or_get_company_type(
                 company_type, 'uk')
         edrpou = record.xpath('EDRPOU')[0].text
         if not edrpou:
             self.report.invalid_data += 1
             continue
         code = name + edrpou
         address = record.xpath('ADDRESS')[0].text
         status = self.save_or_get_status(record.xpath('STAN')[0].text)
         founding_document_number = record.xpath(
             'FOUNDING_DOCUMENT_NUM')[0].text
         executive_power = record.xpath('EXECUTIVE_POWER')[0].text
         if executive_power:
             executive_power = executive_power.lower()
         # if len(record.xpath('ACTIVITY_KINDS')[0]):
         #     self.add_company_to_kved(record.xpath('ACTIVITY_KINDS')[0], code)
         superior_management = record.xpath('SUPERIOR_MANAGEMENT')[0].text
         if superior_management:
             superior_management = superior_management.lower()
         # if len(record.xpath('SIGNERS')[0]):
         #     self.add_signers(record.xpath('SIGNERS')[0], code)
         authorized_capital = record.xpath('AUTHORIZED_CAPITAL')[0].text
         if authorized_capital:
             authorized_capital = authorized_capital.replace(',', '.')
             authorized_capital = float(authorized_capital)
         bylaw = self.save_or_get_bylaw(record.xpath('STATUTE')[0].text)
         registration_date = None
         registration_info = None
         registration = record.xpath('REGISTRATION')[0].text
         if registration:
             registration_date = format_date_to_yymmdd(
                 get_first_word(registration))
             registration_info = cut_first_word(registration)
         managing_paper = record.xpath('MANAGING_PAPER')[0].text
         if managing_paper:
             managing_paper = managing_paper.lower()
         # TODO: refactor branches storing
         # if len(record.xpath('BRANCHES')[0]):
         #     self.add_branches(record.xpath('BRANCHES')[0], code)
         # if record.xpath('TERMINATION_STARTED_INFO/OP_DATE'):
         #     self.add_termination_started(record, code)
         # if record.xpath('BANKRUPTCY_READJUSTMENT_INFO/OP_DATE'):
         #     self.add_bancruptcy_readjustment(record, code)
         # if len(record.xpath('PREDECESSORS')[0]):
         #     self.add_company_to_predecessors(record.xpath('PREDECESSORS')[0], code)
         # if len(record.xpath('ASSIGNEES')[0]):
         #     self.add_assignees(record.xpath('ASSIGNEES')[0], code)
         terminated_info = record.xpath('TERMINATED_INFO')[0].text
         if terminated_info:
             terminated_info = terminated_info.lower()
         termination_cancel_info = record.xpath(
             'TERMINATION_CANCEL_INFO')[0].text
         if termination_cancel_info:
             termination_cancel_info = termination_cancel_info.lower()
         contact_info = record.xpath('CONTACTS')[0].text
         # if record.xpath('EXCHANGE_DATA')[0]:
         #     self.add_exchange_data(record.xpath('EXCHANGE_DATA')[0], code)
         vp_dates = record.xpath('VP_DATES')[0].text
         authority = self.save_or_get_authority(
             record.xpath('CURRENT_AUTHORITY')[0].text)
         # self.add_company_detail(founding_document_number, executive_power, superior_management, managing_paper,
         #                         terminated_info, termination_cancel_info, vp_dates, code)
         # ToDo: resolve the problem of having records with the same company name amd edrpou
         # that results in the same code
         company = Company.objects.filter(code=code).first()
         if not company:
             company = Company(name=name,
                               short_name=short_name,
                               company_type=company_type,
                               edrpou=edrpou,
                               authorized_capital=authorized_capital,
                               status=status,
                               bylaw=bylaw,
                               registration_date=registration_date,
                               registration_info=registration_info,
                               contact_info=contact_info,
                               authority=authority,
                               code=code)
             company.save()
             # self.bulk_manager.add_create(company)
         else:
             update_fields = []
             if company.name != name:
                 company.name = name
                 update_fields.append('name')
             if company.short_name != short_name:
                 company.short_name = short_name
                 update_fields.append('short_name')
             if company.company_type != company_type:
                 company.company_type = company_type
                 update_fields.append('company_type')
             if company.authorized_capital != authorized_capital:
                 company.authorized_capital = authorized_capital
                 update_fields.append('authorized_capital')
             if company.address != address:
                 company.address = address
                 update_fields.append('address')
             if company.status != status:
                 company.status = status
                 update_fields.append('status')
             if company.bylaw != bylaw:
                 company.bylaw = bylaw
                 update_fields.append('bylaw')
             if to_lower_string_if_exists(
                     company.registration_date) != registration_date:
                 company.registration_date = registration_date
                 update_fields.append('registration_date')
             if company.registration_info != registration_info:
                 company.registration_info = registration_info
                 update_fields.append('registration_info')
             if company.contact_info != contact_info:
                 company.contact_info = contact_info
                 update_fields.append('contact_info')
             if company.authority != authority:
                 company.authority = authority
                 update_fields.append('authority')
             if update_fields:
                 update_fields.append('updated_at')
                 company.save(update_fields=update_fields)
                 # self.bulk_manager.add_update(company)
         if len(record.xpath('FOUNDERS')[0]):
             self.save_or_update_founders(
                 record.xpath('FOUNDERS')[0], company)
     # if len(self.bulk_manager.update_queues['business_register.Company']):
     #     self.bulk_manager.commit_update(Company, ['name', 'short_name', 'company_type',
     #                                               'authorized_capital', 'address', 'status',
     #                                               'bylaw', 'registration_date',
     #                                               'registration_info', 'contact_info',
     #                                               'authority'])
     # if len(self.bulk_manager.create_queues['business_register.Company']):
     #     self.bulk_manager.commit_create(Company)
     if len(self.bulk_manager.queues['business_register.Founder']):
         self.bulk_manager.commit(Founder)
     self.bulk_manager.queues['business_register.Founder'] = []
示例#3
0
 def save_detailed_fop_to_db(self, records):
     for record in records:
         fullname = record.xpath('NAME')[0].text
         if not fullname:
             logger.warning(f'ФОП без прізвища: {record}')
             self.report.invalid_data += 1
             continue
         if len(fullname) > 100:
             logger.warning(f'ФОП із задовгим прізвищем: {record}')
             continue
         if fullname:
             fullname = fullname.lower()
         address = record.xpath('ADDRESS')[0].text
         if not address:
             address = 'EMPTY'
         code = fullname + address
         status = self.save_or_get_status(record.xpath('STAN')[0].text)
         registration_text = record.xpath('REGISTRATION')[0].text
         # first getting date, then registration info if REGISTRATION.text exists
         registration_date = None
         registration_info = None
         if registration_text:
             registration_date = format_date_to_yymmdd(get_first_word(registration_text))
             registration_info = cut_first_word(registration_text)
         estate_manager = record.xpath('ESTATE_MANAGER')[0].text
         termination_text = record.xpath('TERMINATED_INFO')[0].text
         termination_date = None
         terminated_info = None
         if termination_text:
             termination_date = format_date_to_yymmdd(get_first_word(termination_text))
             terminated_info = cut_first_word(termination_text)
         termination_cancel_info = record.xpath('TERMINATION_CANCEL_INFO')[0].text
         contact_info = record.xpath('CONTACTS')[0].text
         vp_dates = record.xpath('VP_DATES')[0].text
         authority = self.save_or_get_authority(record.xpath('CURRENT_AUTHORITY')[0].text)
         fop_kveds = record.xpath('ACTIVITY_KINDS')[0]
         exchange_data = record.xpath('EXCHANGE_DATA')[0]
         fop = Fop.objects.filter(code=code).first()
         if not fop:
             fop = Fop(
                 fullname=fullname,
                 address=address,
                 status=status,
                 registration_date=registration_date,
                 registration_info=registration_info,
                 estate_manager=estate_manager,
                 termination_date=termination_date,
                 terminated_info=terminated_info,
                 termination_cancel_info=termination_cancel_info,
                 contact_info=contact_info,
                 vp_dates=vp_dates,
                 authority=authority,
                 code=code)
             self.bulk_manager.add(fop)
             if len(fop_kveds):
                 self.add_fop_kveds_to_dict(fop_kveds, code)
             if len(exchange_data):
                 self.add_fop_exchange_data_to_dict(exchange_data, code)
         else:
             # TODO: make a decision: our algorithm when Fop changes fullname or address?
             update_fields = []
             if fop.status != status:
                 fop.status = status
                 update_fields.append('status')
             if fop.registration_date and str(fop.registration_date) != registration_date:
                 fop.registration_date = registration_date
                 update_fields.append('registration_date')
             if fop.registration_info != registration_info:
                 fop.registration_info = registration_info
                 update_fields.append('registration_info')
             if fop.estate_manager != estate_manager:
                 fop.estate_manager = estate_manager
                 update_fields.append('estate_manager')
             if fop.termination_date and str(fop.termination_date) != termination_date:
                 fop.termination_date = termination_date
                 update_fields.append('termination_date')
             if fop.terminated_info != terminated_info:
                 fop.terminated_info = terminated_info
                 update_fields.append('terminated_info')
             if fop.termination_cancel_info != termination_cancel_info:
                 fop.termination_cancel_info = termination_cancel_info
                 update_fields.append('termination_cancel_info')
             if fop.contact_info != contact_info:
                 fop.contact_info = contact_info
                 update_fields.append('contact_info')
             if fop.vp_dates != vp_dates:
                 fop.vp_dates = vp_dates
                 update_fields.append('vp_dates')
             if fop.authority != authority:
                 fop.authority = authority
                 update_fields.append('authority')
             if len(update_fields):
                 update_fields.append('updated_at')
                 fop.save(update_fields=update_fields)
             if len(fop_kveds):
                 self.update_fop_kveds(fop_kveds, fop)
             if len(exchange_data):
                 self.update_fop_exchange_data(exchange_data, fop)
     if len(self.bulk_manager.queues['business_register.Fop']):
         self.bulk_manager.commit(Fop)
     for fop in self.bulk_manager.queues['business_register.Fop']:
         if fop.code not in self.new_fops_foptokveds:
             continue
         foptokveds = self.new_fops_foptokveds[fop.code]
         for foptokved in foptokveds:
             foptokved.fop = fop
             self.bulk_manager.add(foptokved)
     self.new_fops_foptokveds = {}
     for fop in self.bulk_manager.queues['business_register.Fop']:
         if fop.code not in self.new_fops_exchange_data:
             continue
         fop_exchangedata = self.new_fops_exchange_data[fop.code]
         for exchangedata in fop_exchangedata:
             exchangedata.fop = fop
             self.bulk_manager.add(exchangedata)
     self.new_fops_exchange_data = {}
     self.bulk_manager.queues['business_register.Fop'] = []
     if len(self.bulk_manager.queues['business_register.FopToKved']):
         self.bulk_manager.commit(FopToKved)
     if len(self.bulk_manager.queues['business_register.ExchangeDataFop']):
         self.bulk_manager.commit(ExchangeDataFop)
     self.bulk_manager.queues['business_register.FopToKved'] = []
     self.bulk_manager.queues['business_register.ExchangeDataFop'] = []
示例#4
0
    def save_to_db(self, records):
        self.bylaw = None
        self.company_type = None
        for record in records:
            self.authority = self.save_or_get_authority(record.xpath('CURRENT_AUTHORITY')[0].text)
            self.status = self.save_or_get_status(record.xpath('STAN')[0].text)
            self.save_or_get_bylaw(record.xpath('STATUTE')[0].text)
            self.save_or_get_company_type(record.xpath('OPF')[0].text)

            edrpou = record.xpath('EDRPOU')[0].text or Company.INVALID
            registration_date = None
            registration_info = None
            registration = record.xpath('REGISTRATION')[0].text
            if registration:
                registration_date = format_date_to_yymmdd(
                    get_first_word(registration)) or None
                registration_info = cut_first_word(registration) or None
            # try:
            #     company = Company.objects.filter(
            #         hash_code=self.create_hash_code(record.xpath('NAME')[0].text, edrpou)).first()
            #     company.short_name = record.xpath('SHORT_NAME')[0].text
            #     company.company_type = self.company_type
            #     company.address = record.xpath('ADDRESS')[0].text
            #     company.status = self.status
            #     company.bylaw = self.bylaw
            #     company.registration_date = registration_date
            #     company.registration_info = registration_info
            #     company.contact_info = record.xpath('CONTACTS')[0].text
            #     company.authority = self.authority
            #     self.bulk_manager.add_update(company)
            #
            #     print('update')
            # except:
            #
            #     company = self.company_create(record, edrpou, registration_date, registration_info)
            #     self.bulk_manager.add_create(company)
            #
            #     print('create')
            company = self.company_create(record, edrpou, registration_date, registration_info)
            self.bulk_manager.add_create(company)

            print('create')

            self.add_branches(record, edrpou)
            self.add_assignees(record, edrpou)
            self.add_company_detail(record, edrpou)
            self.add_company_to_kved(record.xpath(
                'ACTIVITY_KINDS')[0], record.xpath('NAME')[0].text, edrpou)
            self.add_bancruptcy_readjustment(record, edrpou)
            self.add_exchange_data(record.xpath('EXCHANGE_DATA')[0], record.xpath('NAME')[0].text, edrpou)
            self.add_founders(record, edrpou)
            self.add_company_to_predecessors(record, edrpou)
            self.add_signers(record, edrpou)
            self.add_termination_started(record, edrpou)


        if len(self.bulk_manager._update_queues['business_register.Company']) > 0:
            self.bulk_manager._commit_update(Company, ['name', 'short_name', 'company_type', 'edrpou'])
        self.bulk_manager._commit_create(Company)
        company_update_dict = {}
        company_create_dict = {}

        for company in self.bulk_manager._update_queues['business_register.Company']:
            company_update_dict[company.hash_code] = company
        for company in self.bulk_manager._create_queues['business_register.Company']:
            company_create_dict[company.hash_code] = company

        self.bulk_manager._update_queues['business_register.Company'] = []
        self.bulk_manager._create_queues['business_register.Company'] = []

        for branch in self.branch_bulk_manager._create_queues['business_register.Company']:
            if self.branch_to_parent[branch.hash_code] in company_update_dict:
                branch.parent = company_update_dict[self.branch_to_parent[branch.hash_code]]
            else:
                branch.parent = company_create_dict[self.branch_to_parent[branch.hash_code]]

        for branch in self.branch_bulk_manager._update_queues['business_register.Company']:
            if self.branch_to_parent[branch.hash_code] in company_update_dict:
                branch.parent = company_update_dict[self.branch_to_parent[branch.hash_code]]
            else:
                branch.parent = company_create_dict[self.branch_to_parent[branch.hash_code]]

        branch_to_parent = {}
        
        for assignee in self.bulk_manager._create_queues['business_register.Assignee']:
            if assignee.hash_code in company_update_dict:
                assignee.company = company_update_dict[assignee.hash_code]
            else:
                assignee.company = company_create_dict[assignee.hash_code]

        for company_to_kved in self.bulk_manager._create_queues['business_register.CompanyToKved']:
            if company_to_kved.hash_code in company_update_dict:
                company_to_kved.company = company_update_dict[company_to_kved.hash_code]
            else:
                company_to_kved.company = company_create_dict[company_to_kved.hash_code]

        for exchange_data in self.bulk_manager._create_queues['business_register.ExchangeDataCompany']:
            if exchange_data.hash_code in company_update_dict:
                exchange_data.company = company_update_dict[exchange_data.hash_code]
            else:
                exchange_data.company = company_create_dict[exchange_data.hash_code]

        for founder in self.bulk_manager._create_queues['business_register.FounderFull']:
            if founder.hash_code in company_update_dict:
                founder.company = company_update_dict[founder.hash_code]
            else:
                founder.company = company_create_dict[founder.hash_code]

        for bancruptcy_readjustment in self.bulk_manager._create_queues['business_register.BancruptcyReadjustment']:
            if bancruptcy_readjustment.hash_code in company_update_dict:
                bancruptcy_readjustment.company = company_update_dict[bancruptcy_readjustment.hash_code]
            else:
                bancruptcy_readjustment.company = company_create_dict[bancruptcy_readjustment.hash_code]

        for company_detail in self.bulk_manager._create_queues['business_register.CompanyDetail']:
            if company_detail.hash_code in company_update_dict:
                company_detail.company = company_update_dict[company_detail.hash_code]
            else:
                company_detail.company = company_create_dict[company_detail.hash_code]

        for company_to_predecessor in self.bulk_manager._create_queues['business_register.CompanyToPredecessor']:
            if company_to_predecessor.hash_code in company_update_dict:
                company_to_predecessor.company = company_update_dict[company_to_predecessor.hash_code]
            else:
                company_to_predecessor.company = company_create_dict[company_to_predecessor.hash_code]

        for signer in self.bulk_manager._create_queues['business_register.Signer']:
            if signer.hash_code in company_update_dict:
                signer.company = company_update_dict[signer.hash_code]
            else:
                signer.company = company_create_dict[signer.hash_code]

        for termination_started in self.bulk_manager._create_queues['business_register.TerminationStarted']:
            if termination_started.hash_code in company_update_dict:
                termination_started.company = company_update_dict[termination_started.hash_code]
            else:
                termination_started.company = company_create_dict[termination_started.hash_code]

        self.bulk_manager._commit_create(Assignee)
        self.bulk_manager._commit_create(FounderFull)
        self.bulk_manager._commit_create(BancruptcyReadjustment)
        self.bulk_manager._commit_create(CompanyDetail)
        self.bulk_manager._commit_create(CompanyToKved)
        self.bulk_manager._commit_create(ExchangeDataCompany)
        self.bulk_manager._commit_create(CompanyToPredecessor)
        self.bulk_manager._commit_create(Signer)
        self.bulk_manager._commit_create(TerminationStarted)
        if len(self.branch_bulk_manager._update_queues['business_register.Company']) > 0:
            self.branch_bulk_manager._commit_update(Company, ['name', 'short_name'])
        self.branch_bulk_manager._commit_create(Company)

        company_update_dict = {}
        company_create_dict = {}

        for company in self.branch_bulk_manager._update_queues['business_register.Company']:
            company_update_dict[company.hash_code] = company
        for company in self.branch_bulk_manager._create_queues['business_register.Company']:
            company_create_dict[company.hash_code] = company

        self.bulk_manager._create_queues['business_register.Assignee'] = []
        self.bulk_manager._create_queues['business_register.FounderFull'] = []
        self.bulk_manager._create_queues['business_register.BancruptcyReadjustment'] = []
        self.bulk_manager._create_queues['business_register.CompanyDetail'] = []
        self.bulk_manager._create_queues['business_register.CompanyToKved'] = []
        self.bulk_manager._create_queues['business_register.ExchangeDataCompany'] = []
        self.bulk_manager._create_queues['business_register.CompanyToPredecessor'] = []
        self.bulk_manager._create_queues['business_register.Signer'] = []
        self.bulk_manager._create_queues['business_register.TerminationStarted'] = []
        self.branch_bulk_manager._update_queues['business_register.Company'] = []
        self.branch_bulk_manager._create_queues['business_register.Company'] = []

        for company_to_kved in self.branch_bulk_manager._create_queues['business_register.CompanyToKved']:
            if company_to_kved.hash_code in company_update_dict:
                company_to_kved.company = company_update_dict[company_to_kved.hash_code]
            else:
                company_to_kved.company = company_create_dict[company_to_kved.hash_code]

        for exchange_data in self.branch_bulk_manager._create_queues['business_register.ExchangeDataCompany']:
            if exchange_data.hash_code in company_update_dict:
                exchange_data.company = company_update_dict[exchange_data.hash_code]
            else:
                exchange_data.company = company_create_dict[exchange_data.hash_code]

        for signer in self.branch_bulk_manager._create_queues['business_register.Signer']:
            if signer.hash_code in company_update_dict:
                signer.company = company_update_dict[signer.hash_code]
            else:
                signer.company = company_create_dict[signer.hash_code]

        self.branch_bulk_manager._commit_create(CompanyToKved)
        self.branch_bulk_manager._commit_create(ExchangeDataCompany)
        self.branch_bulk_manager._commit_create(Signer)
        self.branch_bulk_manager._create_queues['business_register.CompanyToKved'] = []
        self.branch_bulk_manager._create_queues['business_register.ExchangeDataCompany'] = []
        self.branch_bulk_manager._create_queues['business_register.Signer'] = []