Пример #1
0
 def __init__(self):
     self.LOCAL_FOLDER = settings.LOCAL_FOLDER
     self.LOCAL_FILE_NAME = settings.LOCAL_FILE_NAME_FOP_FULL
     self.CHUNK_SIZE = settings.CHUNK_SIZE_FOP_FULL
     self.RECORD_TAG = 'SUBJECT'
     self.bulk_manager = BulkCreateManager()
     self.new_fops_foptokveds = {}
     self.new_fops_exchange_data = {}
     super().__init__()
Пример #2
0
 def __init__(self):
     self.API_ADDRESS_FOR_DATASET = Register.objects.get(source_register_id=
                                                         "1c7f3815-3259-45e0-bdf1-64dca07ddc10").source_api_address
     self.LOCAL_FOLDER = settings.LOCAL_FOLDER
     self.LOCAL_FILE_NAME = settings.LOCAL_FILE_NAME_FOP
     self.CHUNK_SIZE = settings.CHUNK_SIZE_FOP
     self.RECORD_TAG = 'RECORD'
     self.bulk_manager = BulkCreateManager()
     self.new_fops_foptokveds = {}
     self.new_fops_exchange_data = {}
     super().__init__()
Пример #3
0
 def __init__(self):
     self.LOCAL_FILE_NAME = settings.LOCAL_FILE_NAME_UO
     self.LOCAL_FOLDER = settings.LOCAL_FOLDER
     self.CHUNK_SIZE = settings.CHUNK_SIZE_UO
     self.RECORD_TAG = 'RECORD'
     self.bulk_manager = BulkCreateManager()
     self.branch_bulk_manager = BulkCreateManager()
     self.all_bylaw_dict = self.put_objects_to_dict("name", "business_register", "Bylaw")
     self.all_predecessors_dict = self.put_objects_to_dict("name", "business_register",
                                                               "Predecessor")
     self.all_companies_dict = {}
     self.branch_to_parent = {}
     self.all_company_founders = []
     super().__init__()
Пример #4
0
 def __init__(self):
     self.API_ADDRESS_FOR_DATASET = Register.objects.get(
         source_register_id=settings.LOCATION_RATU_SOURCE_REGISTER_ID
     ).source_api_address
     self.LOCAL_FOLDER = settings.LOCAL_FOLDER
     self.LOCAL_FILE_NAME = settings.LOCAL_FILE_NAME_RATU
     self.CHUNK_SIZE = settings.CHUNK_SIZE_RATU
     self.RECORD_TAG = 'RECORD'
     self.bulk_manager = BulkCreateManager()
     self.all_regions_dict = self.put_objects_to_dict(
         'name', 'location_register', 'RatuRegion')
     self.all_districts_dict = self.put_objects_to_dict(
         'code', 'location_register', 'RatuDistrict')
     self.all_cities_dict = self.put_objects_to_dict(
         'code', 'location_register', 'RatuCity')
     self.all_citydistricts_dict = self.put_objects_to_dict(
         'code', 'location_register', 'RatuCityDistrict')
     self.all_streets_dict = self.put_objects_to_dict(
         'code', 'location_register', 'RatuStreet')
     self.outdated_districts_dict = self.put_objects_to_dict(
         'code', 'location_register', 'RatuDistrict')
     self.outdated_cities_dict = self.put_objects_to_dict(
         'code', 'location_register', 'RatuCity')
     self.outdated_citydistricts_dict = self.put_objects_to_dict(
         'code', 'location_register', 'RatuCityDistrict')
     self.outdated_streets_dict = self.put_objects_to_dict(
         'code', 'location_register', 'RatuStreet')
     super().__init__()
Пример #5
0
class RfopConverter(Converter):
    LOCAL_FILE_NAME = "fop.xml"
    DATASET_ID = "1c7f3815-3259-45e0-bdf1-64dca07ddc10"
    CHUNK_SIZE = 200

    def rename_file(self, file):
        new_filename = file
        if (file.upper().find('UO') >= 0): new_filename = 'uo.xml'
        if (file.upper().find('FOP') >= 0): new_filename = 'fop.xml'
        return new_filename

    # list of models for clearing DB
    tables = [Rfop]

    # format record's data
    record = {'RECORD': '', 'FIO': '', 'ADDRESS': '', 'KVED': '', 'STAN': ''}

    # creating dictionaries for registration items that had writed to db
    state_dict = {}  # dictionary uses for keeping whole model class objects
    kved_dict = {}

    bulk_manager = BulkCreateManager(CHUNK_SIZE)

    for state in State.objects.all():
        state_dict[state.name] = state
    for kved in Kved.objects.all():
        kved_dict[kved.code] = kved

    # writing entry to db
    def save_to_db(self, record):
        state = self.save_to_state_table(record)
        kved = self.get_kved_from_DB(record, 'FIO')
        self.save_to_rfop_table(record, state, kved)
        print('saved')

    # writing entry to state table
    def save_to_state_table(self, record):
        if record['STAN']:
            state_name = record['STAN']
        else:
            state_name = State.EMPTY_FIELD
        if not state_name in self.state_dict:
            state = State(name=state_name)
            state.save()
            self.state_dict[state_name] = state
            return state
        state = self.state_dict[state_name]
        return state

    # writing entry to rfop table
    def save_to_rfop_table(self, record, state, kved):
        rfop = Rfop(state=state,
                    kved=kved,
                    fullname=record['FIO'],
                    address=record['ADDRESS'])
        self.bulk_manager.add(rfop)

    print(
        'Rfop_class already imported. For start rewriting RFOP to the DB run > RfopConverter().process()\n',
        'For clear RFOP tables run > RfopConverter().clear_db()')
Пример #6
0
class UkrCompanyConverter(CompanyConverter):
    def __init__(self):
        self.LOCAL_FILE_NAME = settings.LOCAL_FILE_NAME_UO
        self.LOCAL_FOLDER = settings.LOCAL_FOLDER
        self.CHUNK_SIZE = settings.CHUNK_SIZE_UO
        self.RECORD_TAG = 'RECORD'
        self.bulk_manager = BulkCreateManager()
        self.branch_bulk_manager = BulkCreateManager()
        self.all_bylaw_dict = self.put_objects_to_dict("name",
                                                       "business_register",
                                                       "Bylaw")
        self.all_predecessors_dict = self.put_objects_to_dict(
            "name", "business_register", "Predecessor")
        self.all_companies_dict = {}
        self.branch_to_parent = {}
        self.all_company_founders = []
        super().__init__()

    def save_or_get_bylaw(self, bylaw_from_record):
        if bylaw_from_record not in self.all_bylaw_dict:
            new_bylaw = Bylaw.objects.create(name=bylaw_from_record)
            self.all_bylaw_dict[bylaw_from_record] = new_bylaw
            return new_bylaw
        return self.all_bylaw_dict[bylaw_from_record]

    def save_or_get_predecessor(self, item):
        if item.xpath('NAME')[0].text not in self.all_predecessors_dict:
            new_predecessor = Predecessor.objects.create(
                name=item.xpath('NAME')[0].text.lower(),
                code=item.xpath('CODE')[0].text)
            self.all_predecessors_dict[item.xpath('NAME')
                                       [0].text] = new_predecessor
            return new_predecessor
        return self.all_predecessors_dict[item.xpath('NAME')[0].text]

    def extract_detail_founder_data(self, founder_info):
        info_to_list = founder_info.split(',')
        # deleting spaces between strings if exist
        info_to_list = [string.strip() for string in info_to_list]
        # getting first element that is a name
        name = info_to_list[0]
        # checking if second element is a EDRPOU code
        edrpou = info_to_list[1] if self.find_edrpou(info_to_list[1]) else None
        # checking if other element is an EDRPOU code in case if the name has commas inside
        if not edrpou:
            for string in info_to_list:
                if self.find_edrpou(string):
                    edrpou = string
                    # getting the name with commas inside
                    info_to_new_list = founder_info.split(string)
                    name = info_to_new_list[0]
                    logger.warning(f'Нестандартний запис: {founder_info}')
                    break
        equity = None
        element_with_equity = None
        # usually equity is at the end of the record
        for string in info_to_list:
            if string.startswith('розмір внеску до статутного фонду'
                                 ) and string.endswith('грн.'):
                element_with_equity = string
                equity = float(re.findall('\d+\.\d+', string)[0])
                break
        # deleting all info except the address
        address = founder_info.replace(name, '')
        if edrpou:
            address = address.replace(edrpou, '')
        if element_with_equity:
            address = address.replace(element_with_equity, '')
        if address and len(address) < 15:
            address = None
        if address and len(address) > 200:
            logger.warning(
                f'Завелика адреса: {address} із запису: {founder_info}')
        return name, edrpou, address, equity

    def extract_founder_data(self, founder_info):
        # split by first comma that usually separates name and equity that also has comma
        info_to_list = founder_info.split(',', 1)
        info_to_list = [string.strip() for string in info_to_list]
        name = info_to_list[0]
        is_beneficiary = False
        if name.startswith('КІНЦЕВИЙ БЕНЕФІЦІАРНИЙ ВЛАСНИК'):
            is_beneficiary = True
        second_part = info_to_list[1]
        equity = None
        address = None
        if second_part.startswith('розмір частки'):
            digital_value = re.findall('\d+\,\d+', second_part)[0]
            equity = float(digital_value.replace(',', '.'))
        else:
            address = second_part
        return name, is_beneficiary, address, equity

    def save_or_update_founders(self, founders_from_record, company):
        already_stored_founders = list(Founder.objects.filter(company=company))
        for item in founders_from_record:
            info = item.text
            # checking if field contains data
            if not info or info.endswith('ВІДСУТНІЙ'):
                continue
            # checking if there is additional data except name
            if ',' in item.text:
                name, is_beneficiary, address, equity = self.extract_founder_data(
                    item.text)
                name = name.lower()
            else:
                name = item.text.lower()
                equity, address = None, None
                is_beneficiary = False
            already_stored = False
            if len(already_stored_founders):
                for stored_founder in already_stored_founders:
                    if stored_founder.name == name:
                        already_stored = True
                        update_fields = []
                        if info and stored_founder.info != info:
                            stored_founder.info = info
                            update_fields.append('info')
                        if stored_founder.is_beneficiary != is_beneficiary:
                            stored_founder.is_beneficiary = is_beneficiary
                            update_fields.append('is_beneficiary')
                        if address and stored_founder.address != address:
                            stored_founder.address = address
                            update_fields.append('address')
                        if equity and stored_founder.equity != equity:
                            stored_founder.equity = equity
                            update_fields.append('equity')
                        if update_fields:
                            update_fields.append('updated_at')
                            stored_founder.save(update_fields=update_fields)
                        already_stored_founders.remove(stored_founder)
                        break
            if not already_stored:
                Founder.objects.create(company=company,
                                       info=info,
                                       name=name,
                                       address=address,
                                       equity=equity,
                                       is_beneficiary=is_beneficiary,
                                       is_founder=True)
        if len(already_stored_founders):
            for outdated_founder in already_stored_founders:
                outdated_founder.soft_delete()

    def extract_beneficiary_data(self, beneficiary_info):
        # split by first comma that usually separates name and equity that also has comma
        info_to_list = beneficiary_info.split(',', 1)
        info_to_list = [string.strip() for string in info_to_list]
        name = info_to_list[0]
        next_word_after_name = info_to_list[1].split(',', 1)[0]
        edrpou = next_word_after_name if self.find_edrpou(
            next_word_after_name) else None
        if edrpou:
            address = info_to_list[1].replace(edrpou, '')
        else:
            address = info_to_list[1]
        return name, edrpou, address

    def save_or_update_beneficiaries(self, beneficiares_from_record, company):
        already_stored_founders = list(Founder.objects.filter(company=company))
        for item in beneficiares_from_record:
            info = item.text
            name, edrpou, address = self.extract_beneficiary_data(info)
            name = name.lower()
            already_stored = False
            if len(already_stored_founders):
                for stored_founder in already_stored_founders:
                    if stored_founder.name == name:
                        already_stored = True
                        update_fields = []
                        if not stored_founder.is_beneficiary:
                            stored_founder.is_beneficiary = True
                            update_fields.append('is_beneficiary')
                        if edrpou and stored_founder.edrpou != edrpou:
                            stored_founder.edrpou = edrpou
                            update_fields.append('edrpou')
                        if address and stored_founder.address != address:
                            stored_founder.address = address
                            update_fields.append('address')
                        if update_fields:
                            update_fields.append('updated_at')
                            stored_founder.save(update_fields=update_fields)
                        already_stored_founders.remove(stored_founder)
                        break
            if not already_stored:
                Founder.objects.create(company=company,
                                       info=info,
                                       name=name,
                                       edrpou=edrpou,
                                       address=address,
                                       is_beneficiary=True)
        if len(already_stored_founders):
            for outdated_founder in already_stored_founders:
                outdated_founder.soft_delete()

    def branch_create(self, item, code):
        branch = Company()
        branch.name = item.xpath('NAME')[0].text
        branch.short_name = code
        branch.address = item.xpath('ADDRESS')[0].text
        if item.xpath('CREATE_DATE')[0].text:
            branch.registration_date = format_date_to_yymmdd(
                item.xpath('CREATE_DATE')[0].text) or None
        branch.contact_info = item.xpath('CONTACTS')[0].text
        branch.authority = self.authority
        branch.bylaw = self.bylaw
        branch.company_type = self.company_type
        branch.status = self.status
        branch.hash_code = self.create_hash_code(branch.name, code)
        return branch

    def add_company_detail(self, founding_document_number, executive_power,
                           superior_management, managing_paper,
                           terminated_info, termination_cancel_info, vp_dates,
                           code):
        company_detail = CompanyDetail()
        company_detail.founding_document_number = founding_document_number
        company_detail.executive_power = executive_power
        company_detail.superior_management = superior_management
        company_detail.managing_paper = managing_paper
        company_detail.terminated_info = terminated_info
        company_detail.termination_cancel_info = termination_cancel_info
        company_detail.vp_dates = vp_dates
        company_detail.hash_code = code
        self.bulk_manager.add(company_detail)

    def add_assignees(self, assignees_from_record, code):
        for item in assignees_from_record:
            assignee = Assignee()
            assignee.name = item.xpath('NAME')[0].text.lower()
            assignee.edrpou = item.xpath('CODE')[0].text
            assignee.hash_code = code
            self.bulk_manager.add(assignee)

    def add_bancruptcy_readjustment(self, record, code):
        bancruptcy_readjustment = BancruptcyReadjustment()
        if record.xpath('BANKRUPTCY_READJUSTMENT_INFO/OP_DATE'):
            bancruptcy_readjustment.op_date = format_date_to_yymmdd(
                record.xpath('BANKRUPTCY_READJUSTMENT_INFO/OP_DATE')
                [0].text) or None
            bancruptcy_readjustment.reason = record.xpath(
                'BANKRUPTCY_READJUSTMENT_INFO/REASON')[0].text.lower()
            bancruptcy_readjustment.sbj_state = record.xpath(
                'BANKRUPTCY_READJUSTMENT_INFO/SBJ_STATE')[0].text.lower()
            head_name = record.xpath(
                'BANKRUPTCY_READJUSTMENT_INFO/BANKRUPTCY_READJUSTMENT_HEAD_NAME'
            )[0].text
            if head_name:
                bancruptcy_readjustment.head_name = head_name
            bancruptcy_readjustment.hash_code = code
            self.bulk_manager.add(bancruptcy_readjustment)

    def add_company_to_kved(self, kveds_from_record, code):
        for item in kveds_from_record:
            if not item.xpath('NAME'):
                continue
            kved_name = item.xpath('NAME')[0].text
            if not kved_name:
                continue
            company_to_kved = CompanyToKved()
            company_to_kved.kved = self.get_kved_from_DB(kved_name)
            company_to_kved.primary_kved = item.xpath(
                'PRIMARY')[0].text == "так"
            company_to_kved.hash_code = code
            self.bulk_manager.add(company_to_kved)

    def add_company_to_kved_branch(self, kveds_from_record, code):
        for item in kveds_from_record:
            if not item.xpath('NAME'):
                continue
            kved_name = item.xpath('NAME')[0].text
            if not kved_name:
                continue
            company_to_kved = CompanyToKved()
            company_to_kved.kved = self.get_kved_from_DB(kved_name)
            company_to_kved.primary_kved = item.xpath(
                'PRIMARY')[0].text == "так"
            company_to_kved.hash_code = code
            self.branch_bulk_manager.add(company_to_kved)

    def add_exchange_data(self, exchange_data, code):
        for item in exchange_data:
            if item.xpath('AUTHORITY_NAME'):
                exchange_answer = ExchangeDataCompany()
                exchange_answer.authority = self.save_or_get_authority(
                    item.xpath('AUTHORITY_NAME')[0].text)
                taxpayer_type = item.xpath('TAX_PAYER_TYPE')[0].text
                if taxpayer_type:
                    exchange_answer.taxpayer_type = self.save_or_get_taxpayer_type(
                        taxpayer_type)
                if item.xpath('START_DATE')[0].text:
                    exchange_answer.start_date = format_date_to_yymmdd(
                        item.xpath('START_DATE')[0].text) or None
                exchange_answer.start_number = item.xpath('START_NUM')[0].text
                if item.xpath('END_DATE')[0].text:
                    exchange_answer.end_date = format_date_to_yymmdd(
                        item.xpath('END_DATE')[0].text) or None
                exchange_answer.end_number = item.xpath('END_NUM')[0].text
                exchange_answer.hash_code = code
                self.bulk_manager.add(exchange_answer)

    def add_exchange_data_branch(self, exchange_data, name, code):
        if len(exchange_data) > 0:
            for item in exchange_data:
                exchange_answer = ExchangeDataCompany()
                if item.xpath('AUTHORITY_NAME'):
                    exchange_answer.authority = self.save_or_get_authority(
                        item.xpath('AUTHORITY_NAME')[0].text)
                    tax_payer_type = item.xpath(
                        'TAX_PAYER_TYPE')[0].text or Company.INVALID
                    exchange_answer.taxpayer_type = self.save_or_get_taxpayer_type(
                        tax_payer_type)
                    if item.xpath('START_DATE')[0].text:
                        exchange_answer.start_date = format_date_to_yymmdd(
                            item.xpath('START_DATE')[0].text) or None
                    exchange_answer.start_number = item.xpath(
                        'START_NUM')[0].text
                    if item.xpath('END_DATE')[0].text:
                        exchange_answer.end_date = format_date_to_yymmdd(
                            item.xpath('END_DATE')[0].text) or None
                    exchange_answer.end_number = item.xpath('END_NUM')[0].text
                    exchange_answer.hash_code = self.create_hash_code(
                        name, code)
                    self.branch_bulk_manager.add(exchange_answer)

    def add_company_to_predecessors(self, predecessors_from_record, code):
        for item in predecessors_from_record:
            if item.xpath('NAME'):
                company_to_predecessor = CompanyToPredecessor()
                company_to_predecessor.predecessor = self.save_or_get_predecessor(
                    item)
                company_to_predecessor.hash_code = code
                self.bulk_manager.add(company_to_predecessor)

    def add_signers(self, signers_from_record, code):
        for item in signers_from_record:
            signer = Signer()
            signer.name = item.text.lower()
            signer.hash_code = code
            self.bulk_manager.add(signer)

    def add_termination_started(self, record, code):
        if record.xpath('TERMINATION_STARTED_INFO/OP_DATE'):
            termination_started = TerminationStarted()
            if record.xpath('TERMINATION_STARTED_INFO/OP_DATE')[0].text:
                termination_started.op_date = format_date_to_yymmdd(
                    record.xpath('TERMINATION_STARTED_INFO/OP_DATE')
                    [0].text) or None
            termination_started.reason = record.xpath(
                'TERMINATION_STARTED_INFO'
                '/REASON')[0].text.lower()
            termination_started.sbj_state = record.xpath(
                'TERMINATION_STARTED_INFO/SBJ_STATE')[0].text.lower()
            signer_name = record.xpath(
                'TERMINATION_STARTED_INFO/SIGNER_NAME')[0].text
            if signer_name:
                termination_started.signer_name = signer_name.lower()
            if record.xpath(
                    'TERMINATION_STARTED_INFO/CREDITOR_REQ_END_DATE')[0].text:
                termination_started.creditor_reg_end_date = format_date_to_yymmdd(
                    record.xpath(
                        'TERMINATION_STARTED_INFO/CREDITOR_REQ_END_DATE')
                    [0].text) or '01.01.1990'
            termination_started.hash_code = code
            self.bulk_manager.add(termination_started)

    def add_branches(self, record, edrpou):
        for item in record.xpath('BRANCHES')[0]:
            code = item.xpath('CODE')[0].text or Company.INVALID
            self.save_or_get_authority('EMP')
            self.save_or_get_bylaw('EMP')
            self.save_or_get_company_type('EMP', 'uk')
            self.save_or_get_status('EMP')

    # try:
    #     branch = Company.objects.filter(
    #         hash_code=self.create_hash_code(item.xpath('NAME')[0].text, code)).first()
    # except:
    #     pass
    # if branch:
    #     branch.address = item.xpath('ADDRESS')[0].text
    #     if item.xpath('CREATE_DATE')[0].text:
    #         branch.registration_date = format_date_to_yymmdd(
    #             item.xpath('CREATE_DATE')[0].text) or None
    #     branch.contact_info = item.xpath('CONTACTS')[0].text
    #     self.branch_bulk_manager.add_update(branch)
    #     print('update')
    # else:
    #     branch = self.branch_create(item, code)
    #     self.branch_bulk_manager.add_create(branch)
    #     print('create')
    # branch = self.branch_create(item, code)
    # self.branch_bulk_manager.add_create(branch)
    # branch_kveds = item.xpath('ACTIVITY_KINDS')[0]
    # if len(branch_kveds):
    #     self.add_company_to_kved_branch(branch_kveds, item.xpath('NAME')[0].text, code)
    # self.add_exchange_data_branch(
    #     item.xpath('EXCHANGE_DATA')[0],
    #     item.xpath('NAME')[0].text, code
    # )
    # if item.xpath('SIGNER'):
    #     signer = Signer(
    #         name=item.xpath('SIGNER')[0].text,
    #         hash_code=self.create_hash_code(item.xpath('NAME')[0].text, code)
    #     )
    #     self.branch_bulk_manager.add_create(signer)
    # self.branch_to_parent[
    #     self.create_hash_code(item.xpath('NAME')[0].text, code)
    # ] = self.create_hash_code(record.xpath('NAME')[0].text, edrpou)

    def save_detail_company_to_db(self, records):
        for record in records:
            name = record.xpath('NAME')[0].text.lower()
            short_name = record.xpath('SHORT_NAME')[0].text
            if short_name:
                short_name = short_name.lower()
            company_type = record.xpath('OPF')[0].text
            if company_type:
                company_type = self.save_or_get_company_type(
                    company_type, 'uk')
            edrpou = record.xpath('EDRPOU')[0].text
            if not edrpou:
                self.report.invalid_data += 1
                continue
            code = name + edrpou
            address = record.xpath('ADDRESS')[0].text
            status = self.save_or_get_status(record.xpath('STAN')[0].text)
            founding_document_number = record.xpath(
                'FOUNDING_DOCUMENT_NUM')[0].text
            executive_power = record.xpath('EXECUTIVE_POWER')[0].text
            if executive_power:
                executive_power = executive_power.lower()
            # if len(record.xpath('ACTIVITY_KINDS')[0]):
            #     self.add_company_to_kved(record.xpath('ACTIVITY_KINDS')[0], code)
            superior_management = record.xpath('SUPERIOR_MANAGEMENT')[0].text
            if superior_management:
                superior_management = superior_management.lower()
            # if len(record.xpath('SIGNERS')[0]):
            #     self.add_signers(record.xpath('SIGNERS')[0], code)
            authorized_capital = record.xpath('AUTHORIZED_CAPITAL')[0].text
            if authorized_capital:
                authorized_capital = authorized_capital.replace(',', '.')
                authorized_capital = float(authorized_capital)
            bylaw = self.save_or_get_bylaw(record.xpath('STATUTE')[0].text)
            registration_date = None
            registration_info = None
            registration = record.xpath('REGISTRATION')[0].text
            if registration:
                registration_date = format_date_to_yymmdd(
                    get_first_word(registration))
                registration_info = cut_first_word(registration)
            managing_paper = record.xpath('MANAGING_PAPER')[0].text
            if managing_paper:
                managing_paper = managing_paper.lower()
            # TODO: refactor branches storing
            # if len(record.xpath('BRANCHES')[0]):
            #     self.add_branches(record.xpath('BRANCHES')[0], code)
            # if record.xpath('TERMINATION_STARTED_INFO/OP_DATE'):
            #     self.add_termination_started(record, code)
            # if record.xpath('BANKRUPTCY_READJUSTMENT_INFO/OP_DATE'):
            #     self.add_bancruptcy_readjustment(record, code)
            # if len(record.xpath('PREDECESSORS')[0]):
            #     self.add_company_to_predecessors(record.xpath('PREDECESSORS')[0], code)
            # if len(record.xpath('ASSIGNEES')[0]):
            #     self.add_assignees(record.xpath('ASSIGNEES')[0], code)
            terminated_info = record.xpath('TERMINATED_INFO')[0].text
            if terminated_info:
                terminated_info = terminated_info.lower()
            termination_cancel_info = record.xpath(
                'TERMINATION_CANCEL_INFO')[0].text
            if termination_cancel_info:
                termination_cancel_info = termination_cancel_info.lower()
            contact_info = record.xpath('CONTACTS')[0].text
            # if record.xpath('EXCHANGE_DATA')[0]:
            #     self.add_exchange_data(record.xpath('EXCHANGE_DATA')[0], code)
            vp_dates = record.xpath('VP_DATES')[0].text
            authority = self.save_or_get_authority(
                record.xpath('CURRENT_AUTHORITY')[0].text)
            # self.add_company_detail(founding_document_number, executive_power, superior_management, managing_paper,
            #                         terminated_info, termination_cancel_info, vp_dates, code)
            # ToDo: resolve the problem of having records with the same company name amd edrpou
            # that results in the same code
            company = Company.objects.filter(code=code).first()
            if not company:
                company = Company(name=name,
                                  short_name=short_name,
                                  company_type=company_type,
                                  edrpou=edrpou,
                                  authorized_capital=authorized_capital,
                                  status=status,
                                  bylaw=bylaw,
                                  registration_date=registration_date,
                                  registration_info=registration_info,
                                  contact_info=contact_info,
                                  authority=authority,
                                  code=code)
                company.save()
                # self.bulk_manager.add_create(company)
            else:
                update_fields = []
                if company.name != name:
                    company.name = name
                    update_fields.append('name')
                if company.short_name != short_name:
                    company.short_name = short_name
                    update_fields.append('short_name')
                if company.company_type != company_type:
                    company.company_type = company_type
                    update_fields.append('company_type')
                if company.authorized_capital != authorized_capital:
                    company.authorized_capital = authorized_capital
                    update_fields.append('authorized_capital')
                if company.address != address:
                    company.address = address
                    update_fields.append('address')
                if company.status != status:
                    company.status = status
                    update_fields.append('status')
                if company.bylaw != bylaw:
                    company.bylaw = bylaw
                    update_fields.append('bylaw')
                if to_lower_string_if_exists(
                        company.registration_date) != registration_date:
                    company.registration_date = registration_date
                    update_fields.append('registration_date')
                if company.registration_info != registration_info:
                    company.registration_info = registration_info
                    update_fields.append('registration_info')
                if company.contact_info != contact_info:
                    company.contact_info = contact_info
                    update_fields.append('contact_info')
                if company.authority != authority:
                    company.authority = authority
                    update_fields.append('authority')
                if update_fields:
                    update_fields.append('updated_at')
                    company.save(update_fields=update_fields)
                    # self.bulk_manager.add_update(company)
            if len(record.xpath('FOUNDERS')[0]):
                self.save_or_update_founders(
                    record.xpath('FOUNDERS')[0], company)
        # if len(self.bulk_manager.update_queues['business_register.Company']):
        #     self.bulk_manager.commit_update(Company, ['name', 'short_name', 'company_type',
        #                                               'authorized_capital', 'address', 'status',
        #                                               'bylaw', 'registration_date',
        #                                               'registration_info', 'contact_info',
        #                                               'authority'])
        # if len(self.bulk_manager.create_queues['business_register.Company']):
        #     self.bulk_manager.commit_create(Company)
        if len(self.bulk_manager.queues['business_register.Founder']):
            self.bulk_manager.commit(Founder)
        self.bulk_manager.queues['business_register.Founder'] = []

        # for company in self.bulk_manager.create_queues['business_register.Company']:
        #     self.all_companies_dict[company.company_code] = company
        # self.bulk_manager.update_queues['business_register.Company'] = []
        # self.bulk_manager.create_queues['business_register.Company'] = []

        # for branch in self.branch_bulk_manager._create_queues['business_register.Company']:
        #     if self.branch_to_parent[branch.hash_code] in company_update_dict:
        #         branch.parent = company_update_dict[self.branch_to_parent[branch.hash_code]]
        #     else:
        #         branch.parent = company_create_dict[self.branch_to_parent[branch.hash_code]]
        #
        # for branch in self.branch_bulk_manager._update_queues['business_register.Company']:
        #     if self.branch_to_parent[branch.hash_code] in company_update_dict:
        #         branch.parent = company_update_dict[self.branch_to_parent[branch.hash_code]]
        #     else:
        #         branch.parent = company_create_dict[self.branch_to_parent[branch.hash_code]]
        #
        # branch_to_parent = {}

        # for assignee in self.bulk_manager.create_queues['business_register.Assignee']:
        #     assignee.company = self.all_companies_dict[assignee.company_code]
        #
        # for company_to_kved in self.bulk_manager.create_queues['business_register.CompanyToKved']:
        #     company_to_kved.company = self.all_companies_dict[company_to_kved.company_code]
        #
        # for exchange_data in \
        #         self.bulk_manager.create_queues['business_register.ExchangeDataCompany']:
        #     exchange_data.company = self.all_companies_dict[exchange_data.company_code]
        #
        # for founder in self.bulk_manager.create_queues['business_register.FounderFull']:
        #     founder.company = self.all_companies_dict[founder.company_code]
        #
        # for bancruptcy_readjustment in \
        #         self.bulk_manager.create_queues['business_register.BancruptcyReadjustment']:
        #     bancruptcy_readjustment.company = \
        #         self.all_companies_dict[bancruptcy_readjustment.company_code]
        #
        # for company_detail in self.bulk_manager.create_queues['business_register.CompanyDetail']:
        #     company_detail.company = self.all_companies_dict[company_detail.company_code]
        #
        # for company_to_predecessor in \
        #         self.bulk_manager.create_queues['business_register.CompanyToPredecessor']:
        #     company_to_predecessor.company = \
        #         self.all_companies_dict[company_to_predecessor.company_code]
        #
        # for signer in self.bulk_manager.create_queues['business_register.Signer']:
        #     signer.company = self.all_companies_dict[signer.company_code]
        #
        # for termination_started in \
        #         self.bulk_manager.create_queues['business_register.TerminationStarted']:
        #     termination_started.company = self.all_companies_dict[termination_started.company_code]
        #
        # self.bulk_manager.commit_create(Assignee)
        # self.bulk_manager.commit_create(BancruptcyReadjustment)
        # self.bulk_manager.commit_create(CompanyDetail)
        # self.bulk_manager.commit_create(CompanyToKved)
        # self.bulk_manager.commit_create(ExchangeDataCompany)
        # self.bulk_manager.commit_create(CompanyToPredecessor)
        # self.bulk_manager.commit_create(Signer)
        # self.bulk_manager.commit_create(TerminationStarted)
        # if len(self.branch_bulk_manager.update_queues['business_register.Company']) > 0:
        #     self.branch_bulk_manager.commit_update(Company, ['name', 'short_name'])
        # self.branch_bulk_manager.commit_create(Company)

        # company_update_dict = {}
        # company_create_dict = {}

        # for company in self.branch_bulk_manager.update_queues['business_register.Company']:
        #     company_update_dict[company.company_code] = company
        # for company in self.branch_bulk_manager.create_queues['business_register.Company']:
        #     company_create_dict[company.company_code] = company
        #
        # self.bulk_manager.create_queues['business_register.Assignee'] = []
        # self.bulk_manager.create_queues['business_register.BancruptcyReadjustment'] = []
        # self.bulk_manager.create_queues['business_register.CompanyDetail'] = []
        # self.bulk_manager.create_queues['business_register.CompanyToKved'] = []
        # self.bulk_manager.create_queues['business_register.ExchangeDataCompany'] = []
        # self.bulk_manager.create_queues['business_register.CompanyToPredecessor'] = []
        # self.bulk_manager.create_queues['business_register.Signer'] = []
        # self.bulk_manager.create_queues['business_register.TerminationStarted'] = []
        # self.branch_bulk_manager.update_queues['business_register.Company'] = []
        # self.branch_bulk_manager.create_queues['business_register.Company'] = []
        #
        # for company_to_kved in self.branch_bulk_manager.create_queues['business_register.CompanyToKved']:
        #     if company_to_kved.company_code in company_update_dict:
        #         company_to_kved.company = company_update_dict[company_to_kved.company_code]
        #     else:
        #         company_to_kved.company = company_create_dict[company_to_kved.company_code]
        #
        # for exchange_data in self.branch_bulk_manager.create_queues['business_register.ExchangeDataCompany']:
        #     if exchange_data.company_code in company_update_dict:
        #         exchange_data.company = company_update_dict[exchange_data.company_code]
        #     else:
        #         exchange_data.company = company_create_dict[exchange_data.company_code]
        #
        # for signer in self.branch_bulk_manager.create_queues['business_register.Signer']:
        #     if signer.company_code in company_update_dict:
        #         signer.company = company_update_dict[signer.company_code]
        #     else:
        #         signer.company = company_create_dict[signer.company_code]
        #
        # self.branch_bulk_manager.commit_create(CompanyToKved)
        # self.branch_bulk_manager.commit_create(ExchangeDataCompany)
        # self.branch_bulk_manager.commit_create(Signer)
        # self.branch_bulk_manager.create_queues['business_register.CompanyToKved'] = []
        # self.branch_bulk_manager.create_queues['business_register.ExchangeDataCompany'] = []
        # self.branch_bulk_manager.create_queues['business_register.Signer'] = []

    def save_or_update_kved(self, kved, company):
        current_fop_to_kved = CompanyToKved.objects.filter(company=company,
                                                           kved=kved).first()
        if not current_fop_to_kved:
            CompanyToKved.objects.create(company=company,
                                         kved=kved,
                                         primary_kved=True)
        else:
            if not current_fop_to_kved.primary_kved:
                current_fop_to_kved.primary_kved = True
                current_fop_to_kved.save(
                    update_fields=['primary_kved', 'updated_at'])

    def save_to_db(self, records):
        country = AddressConverter().save_or_get_country('Ukraine')
        for record in records:
            # omitting records without company name or edrpou
            if not record.xpath('NAME')[0].text or not record.xpath(
                    'EDRPOU')[0].text:
                self.report.invalid_data += 1
                continue
            name = record.xpath('NAME')[0].text.lower()
            short_name = record.xpath('SHORT_NAME')[0].text
            if short_name:
                short_name = short_name.lower()
            edrpou = record.xpath('EDRPOU')[0].text
            code = name + edrpou
            address = record.xpath('ADDRESS')[0].text
            status = self.save_or_get_status(record.xpath('STAN')[0].text)
            boss = record.xpath('BOSS')[0].text
            if boss:
                boss = boss.lower()
            # ToDo: resolve the problem of having records with the same company name amd edrpou
            company = (
                Company.objects
                # ToDo: use source after storing source in the server DB
                .exclude(from_antac_only=True).filter(code=code).first())
            source = Company.UKRAINE_REGISTER
            if not company:
                company = Company.objects.create(name=name,
                                                 short_name=short_name,
                                                 edrpou=edrpou,
                                                 address=address,
                                                 status=status,
                                                 boss=boss,
                                                 country=country,
                                                 code=code,
                                                 source=source)
            else:
                update_fields = []
                if company.name != name:
                    company.name = name
                    update_fields.append('name')
                if company.short_name != short_name:
                    company.short_name = short_name
                    update_fields.append('short_name')
                if company.address != address:
                    company.address = address
                    update_fields.append('address')
                if company.status_id != status.id:
                    company.status = status
                    update_fields.append('status')
                if company.boss != boss:
                    company.boss = boss
                    update_fields.append('boss')
                if company.country_id != country.id:
                    company.country = country
                    update_fields.append('country')
                if company.source != source:
                    company.source = source
                    update_fields.append('source')
                if update_fields:
                    update_fields.append('updated_at')
                    company.save(update_fields=update_fields)
            kved_data = record.xpath('KVED')[0].text
            if kved_data and ' ' in kved_data:
                kved = self.extract_kved(kved_data)
                self.save_or_update_kved(kved, company)
            if len(record.xpath('FOUNDERS')[0]):
                self.save_or_update_founders(
                    record.xpath('FOUNDERS')[0], company)
            if len(record.xpath('BENEFICIARIES')[0]):
                self.save_or_update_beneficiaries(
                    record.xpath('BENEFICIARIES')[0], company)
Пример #7
0
class FopConverter(BusinessConverter):

    def __init__(self):
        self.API_ADDRESS_FOR_DATASET = Register.objects.get(source_register_id=
                                                            "1c7f3815-3259-45e0-bdf1-64dca07ddc10").source_api_address
        self.LOCAL_FOLDER = settings.LOCAL_FOLDER
        self.LOCAL_FILE_NAME = settings.LOCAL_FILE_NAME_FOP
        self.CHUNK_SIZE = settings.CHUNK_SIZE_FOP
        self.RECORD_TAG = 'RECORD'
        self.bulk_manager = BulkCreateManager()
        self.new_fops_foptokveds = {}
        self.new_fops_exchange_data = {}
        super().__init__()

    def add_fop_kveds_to_dict(self, fop_kveds_from_record, code):
        all_fop_foptokveds = []
        for activity in fop_kveds_from_record:
            code_info = activity.xpath('CODE')
            if not code_info:
                continue
            kved_code = code_info[0].text
            if not kved_code:
                continue
            name_info = activity.xpath('NAME')
            if not name_info:
                continue
            kved_name = name_info[0].text
            if not kved_name:
                continue
            kved = self.get_kved_from_DB(kved_code, kved_name)
            is_primary = activity.xpath('PRIMARY')[0].text == "так"
            fop_to_kved = FopToKved(kved=kved, primary_kved=is_primary)
            all_fop_foptokveds.append(fop_to_kved)
        if len(all_fop_foptokveds):
            self.new_fops_foptokveds[code] = all_fop_foptokveds

    # putting all kveds into a list
    def update_fop_kveds(self, fop_kveds_from_record, fop):
        already_stored_foptokveds = list(FopToKved.objects.filter(fop=fop))
        for activity in fop_kveds_from_record:
            code_info = activity.xpath('CODE')
            if not code_info:
                continue
            kved_code = code_info[0].text
            if not kved_code:
                continue
            name_info = activity.xpath('NAME')
            if not name_info:
                continue
            kved_name = name_info[0].text
            if not kved_name:
                continue
            kved = self.get_kved_from_DB(kved_code, kved_name)
            is_primary = activity.xpath('PRIMARY')[0].text == "так"
            alredy_stored = False
            if len(already_stored_foptokveds):
                for stored_foptokved in already_stored_foptokveds:
                    if (stored_foptokved.kved.code == kved.code
                            and stored_foptokved.kved.name == kved.name):
                        alredy_stored = True
                        if stored_foptokved.primary_kved != is_primary:
                            stored_foptokved.primary_kved = is_primary
                            stored_foptokved.save(update_fields=['primary_kved', 'updated_at'])
                        already_stored_foptokveds.remove(stored_foptokved)
                        break
            if not alredy_stored:
                fop_to_kved = FopToKved(fop=fop, kved=kved, primary_kved=is_primary)
                self.bulk_manager.add(fop_to_kved)
        if len(already_stored_foptokveds):
            for outdated_foptokved in already_stored_foptokveds:
                outdated_foptokved.soft_delete()

    def extract_exchange_data(self, answer):
        authority_info = answer.xpath('AUTHORITY_NAME')
        authority = None
        if authority_info and authority_info[0].text:
            authority = self.save_or_get_authority(authority_info[0].text)
        taxpayer_info = answer.xpath('TAX_PAYER_TYPE')
        taxpayer_type = None
        if taxpayer_info and taxpayer_info[0].text:
            taxpayer_type = self.save_or_get_taxpayer_type(taxpayer_info[0].text)
        start_date_info = answer.xpath('START_DATE')
        start_date = None
        if start_date_info and start_date_info[0].text:
            start_date = format_date_to_yymmdd(start_date_info[0].text)
        start_number_info = answer.xpath('START_NUM')
        start_number = None
        if start_number_info:
            start_number = start_number_info[0].text
        end_date_info = answer.xpath('END_DATE')
        end_date = None
        if end_date_info and end_date_info[0].text:
            end_date = format_date_to_yymmdd(end_date_info[0].text)
        end_number_info = answer.xpath('END_NUM')
        end_number = None
        if end_number_info and end_number_info[0].text:
            end_number = end_number_info[0].text
        return authority, taxpayer_type, start_date, start_number, end_date, end_number

    def add_fop_exchange_data_to_dict(self, exchange_data, code):
        all_fop_exchangedata = []
        for answer in exchange_data:
            authority, taxpayer_type, start_date, start_number, end_date, end_number \
                = self.extract_exchange_data(answer)
            if (not authority and not taxpayer_type and not start_date
                    and not start_number and not end_date and not end_number):
                continue
            exchange_data = ExchangeDataFop(authority=authority, taxpayer_type=taxpayer_type,
                                            start_date=start_date, start_number=start_number,
                                            end_date=end_date, end_number=end_number)
            all_fop_exchangedata.append(exchange_data)
        if len(all_fop_exchangedata):
            self.new_fops_exchange_data[code] = all_fop_exchangedata

    # putting all exchange data into a list
    def update_fop_exchange_data(self, exchange_data, fop):
        already_stored_exchange_data = ExchangeDataFop.objects.filter(fop=fop)
        for answer in exchange_data:
            authority, taxpayer_type, start_date, start_number, end_date, end_number \
                = self.extract_exchange_data(answer)
            if (not authority and not taxpayer_type and not start_date
                    and not start_number and not end_date and not end_number):
                continue
            already_stored = False
            for stored_exchange_data in already_stored_exchange_data:
                # ToDo: find way to check dates
                if (stored_exchange_data.authority == authority
                        and stored_exchange_data.taxpayer_type == taxpayer_type
                        and stored_exchange_data.start_number == start_number
                        and stored_exchange_data.end_number == end_number):
                    already_stored = True
                    break
            if not already_stored:
                exchange_data = ExchangeDataFop(fop=fop, authority=authority,
                                                taxpayer_type=taxpayer_type,
                                                start_date=start_date, start_number=start_number,
                                                end_date=end_date, end_number=end_number)
                self.bulk_manager.add(exchange_data)

    def save_detailed_fop_to_db(self, records):
        for record in records:
            fullname = record.xpath('NAME')[0].text
            if not fullname:
                logger.warning(f'ФОП без прізвища: {record}')
                self.report.invalid_data += 1
                continue
            if len(fullname) > 100:
                logger.warning(f'ФОП із задовгим прізвищем: {record}')
                continue
            if fullname:
                fullname = fullname.lower()
            address = record.xpath('ADDRESS')[0].text
            if not address:
                address = 'EMPTY'
            code = fullname + address
            status = self.save_or_get_status(record.xpath('STAN')[0].text)
            registration_text = record.xpath('REGISTRATION')[0].text
            # first getting date, then registration info if REGISTRATION.text exists
            registration_date = None
            registration_info = None
            if registration_text:
                registration_date = format_date_to_yymmdd(get_first_word(registration_text))
                registration_info = cut_first_word(registration_text)
            estate_manager = record.xpath('ESTATE_MANAGER')[0].text
            termination_text = record.xpath('TERMINATED_INFO')[0].text
            termination_date = None
            terminated_info = None
            if termination_text:
                termination_date = format_date_to_yymmdd(get_first_word(termination_text))
                terminated_info = cut_first_word(termination_text)
            termination_cancel_info = record.xpath('TERMINATION_CANCEL_INFO')[0].text
            contact_info = record.xpath('CONTACTS')[0].text
            vp_dates = record.xpath('VP_DATES')[0].text
            authority = self.save_or_get_authority(record.xpath('CURRENT_AUTHORITY')[0].text)
            fop_kveds = record.xpath('ACTIVITY_KINDS')[0]
            exchange_data = record.xpath('EXCHANGE_DATA')[0]
            fop = Fop.objects.filter(code=code).first()
            if not fop:
                fop = Fop(
                    fullname=fullname,
                    address=address,
                    status=status,
                    registration_date=registration_date,
                    registration_info=registration_info,
                    estate_manager=estate_manager,
                    termination_date=termination_date,
                    terminated_info=terminated_info,
                    termination_cancel_info=termination_cancel_info,
                    contact_info=contact_info,
                    vp_dates=vp_dates,
                    authority=authority,
                    code=code)
                self.bulk_manager.add(fop)
                if len(fop_kveds):
                    self.add_fop_kveds_to_dict(fop_kveds, code)
                if len(exchange_data):
                    self.add_fop_exchange_data_to_dict(exchange_data, code)
            else:
                # TODO: make a decision: our algorithm when Fop changes fullname or address?
                update_fields = []
                if fop.status != status:
                    fop.status = status
                    update_fields.append('status')
                if fop.registration_date and str(fop.registration_date) != registration_date:
                    fop.registration_date = registration_date
                    update_fields.append('registration_date')
                if fop.registration_info != registration_info:
                    fop.registration_info = registration_info
                    update_fields.append('registration_info')
                if fop.estate_manager != estate_manager:
                    fop.estate_manager = estate_manager
                    update_fields.append('estate_manager')
                if fop.termination_date and str(fop.termination_date) != termination_date:
                    fop.termination_date = termination_date
                    update_fields.append('termination_date')
                if fop.terminated_info != terminated_info:
                    fop.terminated_info = terminated_info
                    update_fields.append('terminated_info')
                if fop.termination_cancel_info != termination_cancel_info:
                    fop.termination_cancel_info = termination_cancel_info
                    update_fields.append('termination_cancel_info')
                if fop.contact_info != contact_info:
                    fop.contact_info = contact_info
                    update_fields.append('contact_info')
                if fop.vp_dates != vp_dates:
                    fop.vp_dates = vp_dates
                    update_fields.append('vp_dates')
                if fop.authority != authority:
                    fop.authority = authority
                    update_fields.append('authority')
                if len(update_fields):
                    update_fields.append('updated_at')
                    fop.save(update_fields=update_fields)
                if len(fop_kveds):
                    self.update_fop_kveds(fop_kveds, fop)
                if len(exchange_data):
                    self.update_fop_exchange_data(exchange_data, fop)
        if len(self.bulk_manager.queues['business_register.Fop']):
            self.bulk_manager.commit(Fop)
        for fop in self.bulk_manager.queues['business_register.Fop']:
            if fop.code not in self.new_fops_foptokveds:
                continue
            foptokveds = self.new_fops_foptokveds[fop.code]
            for foptokved in foptokveds:
                foptokved.fop = fop
                self.bulk_manager.add(foptokved)
        self.new_fops_foptokveds = {}
        for fop in self.bulk_manager.queues['business_register.Fop']:
            if fop.code not in self.new_fops_exchange_data:
                continue
            fop_exchangedata = self.new_fops_exchange_data[fop.code]
            for exchangedata in fop_exchangedata:
                exchangedata.fop = fop
                self.bulk_manager.add(exchangedata)
        self.new_fops_exchange_data = {}
        self.bulk_manager.queues['business_register.Fop'] = []
        if len(self.bulk_manager.queues['business_register.FopToKved']):
            self.bulk_manager.commit(FopToKved)
        if len(self.bulk_manager.queues['business_register.ExchangeDataFop']):
            self.bulk_manager.commit(ExchangeDataFop)
        self.bulk_manager.queues['business_register.FopToKved'] = []
        self.bulk_manager.queues['business_register.ExchangeDataFop'] = []

    def save_or_update_kved(self, kved, fop):
        current_fop_to_kved = FopToKved.objects.filter(
            fop=fop,
            kved=kved
        ).first()
        if not current_fop_to_kved:
            FopToKved.objects.create(
                fop=fop,
                kved=kved,
                primary_kved=True
            )
        else:
            if not current_fop_to_kved.primary_kved:
                current_fop_to_kved.primary_kved = True
                current_fop_to_kved.save(update_fields=['primary_kved', 'updated_at'])

    def save_to_db(self, records):
        for record in records:
            fullname = record.xpath('FIO')[0].text
            if not fullname:
                logger.warning(f'ФОП без прізвища: {record}')
                self.report.invalid_data += 1
                continue
            if len(fullname) > 100:
                logger.warning(f'ФОП із задовгим прізвищем: {record}')
                continue
            fullname = fullname.lower()
            address = record.xpath('ADDRESS')[0].text
            if not address:
                address = 'EMPTY'
            code = fullname + address
            status = self.save_or_get_status(record.xpath('STAN')[0].text)
            fop = Fop.objects.filter(code=code).first()
            if not fop:
                fop = Fop.objects.create(
                    fullname=fullname,
                    address=address,
                    status=status,
                    code=code)
            else:
                # TODO: make a decision: our algorithm when Fop changes fullname or address?
                update_fields = []
                if fop.status != status:
                    fop.status = status
                    update_fields.append('status')
                if len(update_fields):
                    update_fields.append('updated_at')
                    fop.save(update_fields=update_fields)
            kved_data = record.xpath('KVED')[0].text
            if kved_data and ' ' in kved_data:
                kved = self.extract_kved(kved_data)
                self.save_or_update_kved(kved, fop)

    print("For storing run FopConverter().process()")
Пример #8
0
class RuoConverter(Converter):
    CHUNK_SIZE = 300
    LOCAL_FILE_NAME = "uo.xml"
    DATASET_ID = "1c7f3815-3259-45e0-bdf1-64dca07ddc10"

    def rename_file(self, file):
        new_filename = file
        if (file.upper().find('UO') >= 0): new_filename = 'uo.xml'
        if (file.upper().find('FOP') >= 0): new_filename = 'fop.xml'
        return new_filename

    # list of models for clearing DB
    tables = [
        Founders,
        Ruo,
    ]

    # format record's data
    record = {
        'RECORD': '',
        'NAME': '',
        'SHORT_NAME': '',
        'EDRPOU': '',
        'ADDRESS': '',
        'BOSS': '',
        'KVED': '',
        'STAN': '',
        'FOUNDING_DOCUMENT_NUM': '',
        'FOUNDERS': '',
        'FOUNDER': []
    }

    # creating dictionaries for registration items that had writed to db
    state_dict = {}  # dictionary uses for keeping whole model class objects
    kved_dict = {}
    index = 0  # index for entries in _create_queues[model_key] list

    # filling state & kved dictionaries with with existing db items
    for state in State.objects.all():
        state_dict[state.name] = state
    for kved in Kved.objects.all():
        kved_dict[kved.code] = kved

    # creating BulkCreateManager objects
    bulk_manager = BulkCreateManager(CHUNK_SIZE)
    bulk_submanager = BulkCreateManager(
        100000)  # chunck size 100000 for never reach it

    # writing entry to db
    def save_to_db(self, record):
        state = self.save_to_state_table(record)
        kved = self.get_kved_from_DB(record, 'NAME')
        self.save_to_ruo_table(record, state, kved)
        print('saved')

    # writing entry to state table
    def save_to_state_table(self, record):
        if record['STAN']:
            state_name = record['STAN']
        else:
            state_name = State.EMPTY_FIELD
        if not state_name in self.state_dict:
            state = State(name=state_name)
            state.save()
            self.state_dict[state_name] = state
            return state
        state = self.state_dict[state_name]
        return state

    # writing entry to ruo & founders table
    def save_to_ruo_table(self, record, state, kved):
        ruo = Ruo.objects.filter(state=state.id,
                                 kved=kved.id,
                                 name=record['NAME'],
                                 short_name=record['SHORT_NAME'],
                                 edrpou=record['EDRPOU'],
                                 address=record['ADDRESS'],
                                 boss=record['BOSS'])
        if ruo.exists():
            return ruo.first()
        ruo = Ruo(state=state,
                  kved=kved,
                  name=record['NAME'],
                  short_name=record['SHORT_NAME'],
                  edrpou=record['EDRPOU'],
                  address=record['ADDRESS'],
                  boss=record['BOSS'])
        '''Для реализации метода bulk_create() при сохранении вложенных записей штатному полю id объекта founders
        временно присваивается значение индекса объекта ruo в списке _create_queues['ratu.Ruo']. После сохранения 
        в базе данных порции объектов ruo они получают свои уникальные id базы данных, после чего назначаются
        связанному полю founders.company в соответствии с временным id объекта founders. Далее поле founders.id 
        очищается от временного id для сохранения founders в базе данных с id назначенным базой'''
        self.bulk_manager.add(ruo)
        self.add_founders_to_queue(record, ruo)
        self.index = self.index + 1
        if len(self.bulk_manager._create_queues['data_ocean.Ruo']
               ) >= self.CHUNK_SIZE:
            for founders in self.bulk_submanager._create_queues[
                    'data_ocean.Founders']:
                founders.company = self.bulk_manager._create_queues[
                    'data_ocean.Ruo'][founders.id]
                founders.id = None
            self.bulk_submanager._commit(Founders)
            self.bulk_submanager._create_queues['data_ocean.Founders'] = []
            self.index = 0

    # filling _create_queues['ratu.Founders'] list
    def add_founders_to_queue(self, record, ruo):
        for founder in record['FOUNDER']:
            founders = Founders(id=self.index, company=ruo, founder=founder)
            self.bulk_submanager.add(founders)

    print(
        'Ruo already imported. For start rewriting RUO to the DB run > RuoConverter().process()\n',
        'For clear RUO tables run > RuoConverter().clear_db()')