def clean_file(input_path):
    '''clean the input .csv file'''
    # load csv file
    data = petl.fromcsv(input_path)
    # use petl's rowmap to clean each row
    petl.rowmap(data, row_mapper, ['id', 'description'],
                failonerror=True).tocsv(create_output_path(input_path))
示例#2
0
    def execute(self, context):
        try:
            nm_arq = 'RECEITAS.csv'
            with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from:
                data_from.write(
                    self.client_from.get_blob_client(
                        nm_arq).download_blob().readall())
            table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|')

            def rowmapper(row):
                strnull = {'NULL': ''}
                return [
                    row[0].strip(), row[1].strip(), row[2].strip(),
                    row[3].strip(), row[4].strip(),
                    strnull[row['nr_centro_custo'].strip()]
                    if row['nr_centro_custo'].strip() in strnull else
                    row['nr_centro_custo'].strip(),
                    strnull[row['descricao_centro_custo'].strip()]
                    if row['descricao_centro_custo'].strip() in strnull else
                    row['descricao_centro_custo'].strip(), row[7].strip(),
                    row[8].strip(), row[9].strip(),
                    strnull[row['tipo_cobranca_sub'].strip()]
                    if row['tipo_cobranca_sub'].strip() in strnull else
                    row['tipo_cobranca_sub'].strip()
                ]

            table1 = etl.rowmap(table,
                                rowmapper,
                                header=[
                                    'mes_competencia', 'fk_beneficiario',
                                    'fk_empresa', 'dt_geracao_titulo',
                                    'dt_pgto', 'nr_centro_custo',
                                    'descricao_centro_custo', 'tipo_cobranca',
                                    'vl_cobranca', 'vl_pago',
                                    'tipo_cobranca_sub'
                                ])

            table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)])
            etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|')

            self.logger.info(f"Destino do arquivo {self.client_to}")
            self.logger.info(f"Container destino {self.container_to}")

        except azure.core.exceptions.ResourceNotFoundError:
            print('Entrou na exceção :)')

        upload_file = f'{_PROC_FILES}/t{nm_arq}'
        try:
            if os.path.isfile(upload_file):
                with open(upload_file, "rb") as data:
                    self.client_to.upload_blob(nm_arq, data, overwrite=True)
                    self.logger.info(f'{data} carregado')
            else:
                self.logger.info(f't{nm_arq} não foi encontrado no container')
        finally:
            self.logger.info('Tudo Carregado')
示例#3
0
    def execute(self, context):
        try:
            nm_arq = 'SERVICOS.csv'
            with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from:
                data_from.write(
                    self.client_from.get_blob_client(
                        nm_arq).download_blob().readall())
            table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|')

            def rowmapper(row):
                strnull = {'NULL': ''}
                return [
                    row[0].strip(), row[1].strip(), row[2].strip(),
                    row[3].strip(), row[4].strip(),
                    strnull[row['subgrupo'].strip()]
                    if row['subgrupo'].strip() in strnull else
                    row['subgrupo'].strip(), row[6].strip(),
                    strnull[row['dt_alteracao'].strip()]
                    if row['dt_alteracao'].strip() in strnull else
                    row['dt_alteracao'].strip(),
                    strnull[row['ind_cirurgico'].strip()]
                    if row['ind_cirurgico'].strip() in strnull else
                    row['ind_cirurgico'].strip()
                ]

            table1 = etl.rowmap(table,
                                rowmapper,
                                header=[
                                    'pk_servico', 'tipo', 'descricao',
                                    'capitulo', 'grupo', 'subgrupo',
                                    'dt_inclusao', 'dt_alteracao',
                                    'ind_cirurgico'
                                ])

            table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)])
            etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|')

            self.logger.info(f"Destino do arquivo {self.client_to}")
            self.logger.info(f"Container destino {self.container_to}")

        except azure.core.exceptions.ResourceNotFoundError:
            print('Entrou na exceção :)')

        upload_file = f'{_PROC_FILES}/t{nm_arq}'
        try:
            if os.path.isfile(upload_file):
                with open(upload_file, "rb") as data:
                    self.client_to.upload_blob(nm_arq, data, overwrite=True)
                    self.logger.info(f'{data} carregado')
            else:
                self.logger.info(f't{nm_arq} não foi encontrado no container')
        finally:
            self.logger.info('Tudo Carregado')
示例#4
0
    def execute(self, context):
        try:
            nm_arq = 'VIDAS.csv'
            with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from:
                data_from.write(
                    self.client_from.get_blob_client(
                        nm_arq).download_blob().readall())
            table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|')

            def rowmapper(row):
                strnull = {'NULL': ''}
                transatv = {'NÃO ATIVO': 'I', 'ATIVO': 'A'}
                return [
                    row[0].strip(), row[1].strip(),
                    transatv[row['ativo'].strip()]
                    if row['ativo'].strip() in transatv else None
                ]

            table1 = etl.rowmap(
                table,
                rowmapper,
                header=['mes_competencia', 'fk_beneficiario', 'ativo'])

            table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)])
            etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|')

            self.logger.info(f"Destino do arquivo {self.client_to}")
            self.logger.info(f"Container destino {self.container_to}")

        except azure.core.exceptions.ResourceNotFoundError:
            print('Entrou na exceção :)')

        upload_file = f'{_PROC_FILES}/t{nm_arq}'
        try:
            if os.path.isfile(upload_file):
                with open(upload_file, "rb") as data:
                    self.client_to.upload_blob(nm_arq, data, overwrite=True)
                    self.logger.info(f'{data} carregado')
            else:
                self.logger.info(f't{nm_arq} não foi encontrado no container')
        finally:
            self.logger.info('Tudo Carregado')
示例#5
0
    def execute(self, context):
        try:
            nm_arq = 'EMPRESA.csv'
            with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from:
                data_from.write(
                    self.client_from.get_blob_client(
                        nm_arq).download_blob().readall())
            table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|')

            def rowmapper(row):
                strnull = {'NULL': ''}
                return [
                    row[0].strip(), row[1].strip(), row[2].strip(),
                    row[3].strip()
                ]

            table1 = etl.rowmap(
                table,
                rowmapper,
                header=['pk_empresa', 'nome', 'razao_social', 'cnpj'])

            table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)])
            etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|')

            self.logger.info(f"Destino do arquivo {self.client_to}")
            self.logger.info(f"Container destino {self.container_to}")

        except azure.core.exceptions.ResourceNotFoundError:
            print('Entrou na exceção :)')

        upload_file = f'{_PROC_FILES}/t{nm_arq}'
        try:
            if os.path.isfile(upload_file):
                with open(upload_file, "rb") as data:
                    self.client_to.upload_blob(nm_arq, data, overwrite=True)
                    self.logger.info(f'{data} carregado')
            else:
                self.logger.info(f't{nm_arq} não foi encontrado no container')
        finally:
            self.logger.info('Tudo Carregado')
示例#6
0
文件: examples.py 项目: datamade/petl
          [1, 'male', 16, 1.45, 62.0],
          [2, 'female', 19, 1.34, 55.4],
          [3, 'female', 17, 1.78, 74.4],
          [4, 'male', 21, 1.33, 45.2],
          [5, '-', 25, 1.65, 51.9]]

from petl import rowmap, look
look(table1)
def rowmapper(row):
    transmf = {'male': 'M', 'female': 'F'}
    return [row[0],
            transmf[row[1]] if row[1] in transmf else row[1],
            row[2] * 12,
            row[4] / row[3] ** 2]

table2 = rowmap(table1, rowmapper, fields=['subject_id', 'gender', 'age_months', 'bmi'])  
look(table2)    


# recordmap

table1 = [['id', 'sex', 'age', 'height', 'weight'],
          [1, 'male', 16, 1.45, 62.0],
          [2, 'female', 19, 1.34, 55.4],
          [3, 'female', 17, 1.78, 74.4],
          [4, 'male', 21, 1.33, 45.2],
          [5, '-', 25, 1.65, 51.9]]

from petl import recordmap, look
look(table1)
def recmapper(rec):
示例#7
0
          [1, 'male', 16, 1.45, 62.0],
          [2, 'female', 19, 1.34, 55.4],
          [3, 'female', 17, 1.78, 74.4],
          [4, 'male', 21, 1.33, 45.2],
          [5, '-', 25, 1.65, 51.9]]

from petl import rowmap, look
look(table1)
def rowmapper(row):
    transmf = {'male': 'M', 'female': 'F'}
    return [row[0],
            transmf[row[1]] if row[1] in transmf else row[1],
            row[2] * 12,
            row[4] / row[3] ** 2]

table2 = rowmap(table1, rowmapper, fields=['subject_id', 'gender', 'age_months', 'bmi'])  
look(table2)    


# recordmap

table1 = [['id', 'sex', 'age', 'height', 'weight'],
          [1, 'male', 16, 1.45, 62.0],
          [2, 'female', 19, 1.34, 55.4],
          [3, 'female', 17, 1.78, 74.4],
          [4, 'male', 21, 1.33, 45.2],
          [5, '-', 25, 1.65, 51.9]]

from petl import recordmap, look
look(table1)
def recmapper(rec):
示例#8
0
    def execute(self, context):
        try:
            nm_arq = 'BENEFICIARIO.csv'
            with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from:
                data_from.write(
                    self.client_from.get_blob_client(
                        nm_arq).download_blob().readall())
            table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|')

            def rowmapper(row):
                transmf = {'MASCULINO': 'M', 'FEMININO': 'F'}
                strnull = {'NULL': ''}
                return [
                    row[0].strip(), row[1].strip(), row[2].strip(),
                    row[3].strip(), row['nome'].strip(), row[5].strip(),
                    row[6].strip(), row[7].strip(), row[8].strip(),
                    strnull[row['copart_percentual'].strip()]
                    if row['copart_percentual'].strip() in strnull else
                    row['copart_percentual'].strip(),
                    strnull[row['limite_copart'].strip()]
                    if row['limite_copart'].strip() in strnull else
                    row['limite_copart'].strip(), row[11].strip(),
                    row[12].strip(), row[13].strip(), row[14].strip(),
                    strnull[row['dt_exclusao'].strip()]
                    if row['dt_exclusao'].strip() in strnull else
                    row['dt_exclusao'].strip(), row[16],
                    transmf[row['sexo'].strip()]
                    if row['sexo'].strip() in transmf else None,
                    row[18].strip(), row[19].strip(), row[20].strip(),
                    row[21].strip(), row[22].strip(), row[23].strip(),
                    row[24].strip(), row[25].strip(), row[26].strip(),
                    row[27].strip(), row[28].strip(), row[29].strip()
                ]

            table1 = etl.rowmap(
                table,
                rowmapper,
                header=[
                    'pk_beneficiario', 'nr_beneficiario',
                    'nr_beneficiario_tit', 'fk_empresa', 'nome', 'cpf',
                    'dt_nascimento', 'cod_plano', 'descricao_plano',
                    'copart_percentual', 'limite_copart', 'tipo_acomodacao',
                    'abrangencia_plano', 'grau_dependencia', 'dt_inclusao',
                    'dt_exclusao', 'nome_mae', 'sexo', 'tipo_contrato',
                    'endereco', 'bairro', 'cidade', 'uf', 'cep',
                    'tipo_cliente', 'nr_cartaonacionalsaude',
                    'plano_regulamentado', 'descricao_entidade', 'tipo_pessoa',
                    'acao_judicial'
                ])

            table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)])
            etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|')

            self.logger.info(f"Destino do arquivo {self.client_to}")
            self.logger.info(f"Container destino {self.container_to}")

        except azure.core.exceptions.ResourceNotFoundError:
            print('Entrou na exceção :)')

        upload_file = f'{_PROC_FILES}/t{nm_arq}'

        try:
            if os.path.isfile(upload_file):
                with open(upload_file, "rb") as data:
                    self.client_to.upload_blob(nm_arq, data, overwrite=True)
                    self.logger.info(f'{data} carregado')
            else:
                self.logger.info(f't{nm_arq} não foi encontrado no container')
        finally:
            self.logger.info('Tudo Carregado')
示例#9
0
    def execute(self, context):
        try:
            nm_arq = 'PRESTADORES.csv'
            with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from:
                data_from.write(
                    self.client_from.get_blob_client(
                        nm_arq).download_blob().readall())
            table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|')

            def rowmapper(row):
                strnull = {'NULL': ''}
                return [
                    row[0].strip(), row[1].strip(),
                    strnull[row['nome_completo'].strip()]
                    if row['nome_completo'].strip() in strnull else
                    row['nome_completo'].strip(), row[3].strip(),
                    row[4].strip(), strnull[row['dat_nascimento'].strip()]
                    if row['dat_nascimento'].strip() in strnull else
                    row['dat_nascimento'].strip(), row[6].strip(),
                    row[7].strip(), row[8].strip(),
                    strnull[row['cnae'].strip()]
                    if row['cnae'].strip() in strnull else row['cnae'].strip(),
                    row[10].strip(), strnull[row['grau'].strip()]
                    if row['grau'].strip() in strnull else row['grau'].strip(),
                    row[12].strip(), row[13].strip(), row[14].strip(),
                    row[15].strip(), row[16].strip(), row[17].strip(),
                    row[18].strip(), strnull[row['latitude'].strip()]
                    if row['latitude'].strip() in strnull else
                    row['latitude'].strip(), strnull[row['longitude'].strip()]
                    if row['longitude'].strip() in strnull else
                    row['longitude'].strip(), strnull[row['fone1'].strip()] if
                    row['fone1'].strip() in strnull else row['fone1'].strip(),
                    strnull[row['fone2'].strip()] if row['fone2'].strip()
                    in strnull else row['fone2'].strip(),
                    strnull[row['fone3'].strip()] if row['fone3'].strip()
                    in strnull else row['fone3'].strip()
                ]

            table1 = etl.rowmap(
                table,
                rowmapper,
                header=[
                    'pk_medico_prestador', 'nome_fantasia', 'nome_completo',
                    'cod_crm', 'f_crm', 'dat_nascimento', 'num_cpf_cnpj',
                    'tipo_prestador', 'grupo_prestador', 'cnae',
                    'especialidade', 'grau', 'endereço', 'numero',
                    'Complemento', 'bairro', 'cidade_prestador',
                    'uf_prestador', 'cep', 'latitude', 'longitude', 'fone1',
                    'fone2', 'fone3'
                ])

            table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)])
            etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|')

            self.logger.info(f"Destino do arquivo {self.client_to}")
            self.logger.info(f"Container destino {self.container_to}")

        except azure.core.exceptions.ResourceNotFoundError:
            print('Entrou na exceção :)')

        upload_file = f'{_PROC_FILES}/t{nm_arq}'
        try:
            if os.path.isfile(upload_file):
                with open(upload_file, "rb") as data:
                    self.client_to.upload_blob(nm_arq, data, overwrite=True)
                    self.logger.info(f'{data} carregado')
            else:
                self.logger.info(f't{nm_arq} não foi encontrado no container')
        finally:
            self.logger.info('Tudo Carregado')
示例#10
0
    def execute(self, context):
        try:
            nm_arq = 'COSTOS.csv'
            with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from:
                data_from.write(
                    self.client_from.get_blob_client(
                        nm_arq).download_blob().readall())
            table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|')

            def rowmapper(row):
                strnull = {'NULL': ''}
                return [
                    row[0].strip(), row[1].strip(), row[2].strip(),
                    row[3].strip(), strnull[row['nr_guia_principal'].strip()]
                    if row['nr_guia_principal'].strip() in strnull else
                    row['nr_guia_principal'].strip(), row[5].strip(),
                    strnull[row['fk_cid'].strip()] if row['fk_cid'].strip()
                    in strnull else row['fk_cid'].strip(),
                    strnull[row['dt_ini_internacao'].strip()]
                    if row['dt_ini_internacao'].strip() in strnull else
                    row['dt_ini_internacao'].strip(),
                    strnull[row['dt_fim_internacao'].strip()]
                    if row['dt_fim_internacao'].strip() in strnull else
                    row['dt_fim_internacao'].strip(),
                    strnull[row['fk_medicosolicitante'].strip()]
                    if row['fk_medicosolicitante'].strip() in strnull else
                    row['fk_medicosolicitante'].strip(),
                    strnull[row['cod_executante'].strip()]
                    if row['cod_executante'].strip() in strnull else
                    row['cod_executante'].strip(), row[11].strip(),
                    row[12].strip(), row[13].strip(), row[14].strip(),
                    row[15].strip(), row[16].strip(),
                    strnull[row['qtd_unica'].strip()]
                    if row['qtd_unica'].strip() in strnull else
                    row['qtd_unica'].strip(), row[18].strip(), row[19].strip(),
                    row[20].strip(), row[21].strip(), row[22].strip(),
                    row[23].strip(), row[24].strip(),
                    strnull[row['tipo_internacao'].strip()]
                    if row['tipo_internacao'].strip() in strnull else
                    row['tipo_internacao'].strip(),
                    strnull[row['tipo_acomodacao'].strip()]
                    if row['tipo_acomodacao'].strip() in strnull else
                    row['tipo_acomodacao'].strip(),
                    strnull[row['ds_indicacao_clinica'].strip()]
                    if row['ds_indicacao_clinica'].strip() in strnull else
                    row['ds_indicacao_clinica'].strip(), row[28].strip(),
                    row[29].strip(), row[30].strip(),
                    strnull[row['partic_medico'].strip()]
                    if row['partic_medico'].strip() in strnull else
                    row['partic_medico'].strip()
                ]

            table1 = etl.rowmap(
                table,
                rowmapper,
                header=[
                    'cod_custo', 'fk_beneficiario', 'mes_competencia',
                    'nr_guia', 'nr_guia_principal', 'dt_realizacao', 'fk_cid',
                    'dt_ini_internacao', 'dt_fim_internacao',
                    'fk_medicosolicitante', 'cod_executante', 'fk_local_atend',
                    'fk_capa_unico', 'fk_destino_pgto', 'fk_servico',
                    'cod_servico_tipo', 'qtd', 'qtd_unica', 'vl_unitario',
                    'vl_total', 'vl_coparticipacao', 'ind_internacao',
                    'tipo_registro', 'tipo_atendimento', 'tipo_guia',
                    'tipo_internacao', 'tipo_acomodacao',
                    'ds_indicacao_clinica', 'vl_hm', 'vl_co', 'vl_filme',
                    'partic_medico'
                ])

            table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)])
            etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|')

            self.logger.info(f"Destino do arquivo {self.client_to}")
            self.logger.info(f"Container destino {self.container_to}")

        except azure.core.exceptions.ResourceNotFoundError:
            print('Entrou na exceção :)')

        upload_file = f'{_PROC_FILES}/t{nm_arq}'
        try:
            if os.path.isfile(upload_file):
                with open(upload_file, "rb") as data:
                    self.client_to.upload_blob(nm_arq, data, overwrite=True)
                    self.logger.info(f'{data} carregado')
            else:
                self.logger.info(f't{nm_arq} não foi encontrado no container')
        finally:
            self.logger.info('Tudo Carregado')