def clean_file(input_path): '''clean the input .csv file''' # load csv file data = petl.fromcsv(input_path) # use petl's rowmap to clean each row petl.rowmap(data, row_mapper, ['id', 'description'], failonerror=True).tocsv(create_output_path(input_path))
def execute(self, context): try: nm_arq = 'RECEITAS.csv' with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from: data_from.write( self.client_from.get_blob_client( nm_arq).download_blob().readall()) table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|') def rowmapper(row): strnull = {'NULL': ''} return [ row[0].strip(), row[1].strip(), row[2].strip(), row[3].strip(), row[4].strip(), strnull[row['nr_centro_custo'].strip()] if row['nr_centro_custo'].strip() in strnull else row['nr_centro_custo'].strip(), strnull[row['descricao_centro_custo'].strip()] if row['descricao_centro_custo'].strip() in strnull else row['descricao_centro_custo'].strip(), row[7].strip(), row[8].strip(), row[9].strip(), strnull[row['tipo_cobranca_sub'].strip()] if row['tipo_cobranca_sub'].strip() in strnull else row['tipo_cobranca_sub'].strip() ] table1 = etl.rowmap(table, rowmapper, header=[ 'mes_competencia', 'fk_beneficiario', 'fk_empresa', 'dt_geracao_titulo', 'dt_pgto', 'nr_centro_custo', 'descricao_centro_custo', 'tipo_cobranca', 'vl_cobranca', 'vl_pago', 'tipo_cobranca_sub' ]) table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)]) etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|') self.logger.info(f"Destino do arquivo {self.client_to}") self.logger.info(f"Container destino {self.container_to}") except azure.core.exceptions.ResourceNotFoundError: print('Entrou na exceção :)') upload_file = f'{_PROC_FILES}/t{nm_arq}' try: if os.path.isfile(upload_file): with open(upload_file, "rb") as data: self.client_to.upload_blob(nm_arq, data, overwrite=True) self.logger.info(f'{data} carregado') else: self.logger.info(f't{nm_arq} não foi encontrado no container') finally: self.logger.info('Tudo Carregado')
def execute(self, context): try: nm_arq = 'SERVICOS.csv' with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from: data_from.write( self.client_from.get_blob_client( nm_arq).download_blob().readall()) table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|') def rowmapper(row): strnull = {'NULL': ''} return [ row[0].strip(), row[1].strip(), row[2].strip(), row[3].strip(), row[4].strip(), strnull[row['subgrupo'].strip()] if row['subgrupo'].strip() in strnull else row['subgrupo'].strip(), row[6].strip(), strnull[row['dt_alteracao'].strip()] if row['dt_alteracao'].strip() in strnull else row['dt_alteracao'].strip(), strnull[row['ind_cirurgico'].strip()] if row['ind_cirurgico'].strip() in strnull else row['ind_cirurgico'].strip() ] table1 = etl.rowmap(table, rowmapper, header=[ 'pk_servico', 'tipo', 'descricao', 'capitulo', 'grupo', 'subgrupo', 'dt_inclusao', 'dt_alteracao', 'ind_cirurgico' ]) table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)]) etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|') self.logger.info(f"Destino do arquivo {self.client_to}") self.logger.info(f"Container destino {self.container_to}") except azure.core.exceptions.ResourceNotFoundError: print('Entrou na exceção :)') upload_file = f'{_PROC_FILES}/t{nm_arq}' try: if os.path.isfile(upload_file): with open(upload_file, "rb") as data: self.client_to.upload_blob(nm_arq, data, overwrite=True) self.logger.info(f'{data} carregado') else: self.logger.info(f't{nm_arq} não foi encontrado no container') finally: self.logger.info('Tudo Carregado')
def execute(self, context): try: nm_arq = 'VIDAS.csv' with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from: data_from.write( self.client_from.get_blob_client( nm_arq).download_blob().readall()) table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|') def rowmapper(row): strnull = {'NULL': ''} transatv = {'NÃO ATIVO': 'I', 'ATIVO': 'A'} return [ row[0].strip(), row[1].strip(), transatv[row['ativo'].strip()] if row['ativo'].strip() in transatv else None ] table1 = etl.rowmap( table, rowmapper, header=['mes_competencia', 'fk_beneficiario', 'ativo']) table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)]) etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|') self.logger.info(f"Destino do arquivo {self.client_to}") self.logger.info(f"Container destino {self.container_to}") except azure.core.exceptions.ResourceNotFoundError: print('Entrou na exceção :)') upload_file = f'{_PROC_FILES}/t{nm_arq}' try: if os.path.isfile(upload_file): with open(upload_file, "rb") as data: self.client_to.upload_blob(nm_arq, data, overwrite=True) self.logger.info(f'{data} carregado') else: self.logger.info(f't{nm_arq} não foi encontrado no container') finally: self.logger.info('Tudo Carregado')
def execute(self, context): try: nm_arq = 'EMPRESA.csv' with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from: data_from.write( self.client_from.get_blob_client( nm_arq).download_blob().readall()) table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|') def rowmapper(row): strnull = {'NULL': ''} return [ row[0].strip(), row[1].strip(), row[2].strip(), row[3].strip() ] table1 = etl.rowmap( table, rowmapper, header=['pk_empresa', 'nome', 'razao_social', 'cnpj']) table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)]) etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|') self.logger.info(f"Destino do arquivo {self.client_to}") self.logger.info(f"Container destino {self.container_to}") except azure.core.exceptions.ResourceNotFoundError: print('Entrou na exceção :)') upload_file = f'{_PROC_FILES}/t{nm_arq}' try: if os.path.isfile(upload_file): with open(upload_file, "rb") as data: self.client_to.upload_blob(nm_arq, data, overwrite=True) self.logger.info(f'{data} carregado') else: self.logger.info(f't{nm_arq} não foi encontrado no container') finally: self.logger.info('Tudo Carregado')
[1, 'male', 16, 1.45, 62.0], [2, 'female', 19, 1.34, 55.4], [3, 'female', 17, 1.78, 74.4], [4, 'male', 21, 1.33, 45.2], [5, '-', 25, 1.65, 51.9]] from petl import rowmap, look look(table1) def rowmapper(row): transmf = {'male': 'M', 'female': 'F'} return [row[0], transmf[row[1]] if row[1] in transmf else row[1], row[2] * 12, row[4] / row[3] ** 2] table2 = rowmap(table1, rowmapper, fields=['subject_id', 'gender', 'age_months', 'bmi']) look(table2) # recordmap table1 = [['id', 'sex', 'age', 'height', 'weight'], [1, 'male', 16, 1.45, 62.0], [2, 'female', 19, 1.34, 55.4], [3, 'female', 17, 1.78, 74.4], [4, 'male', 21, 1.33, 45.2], [5, '-', 25, 1.65, 51.9]] from petl import recordmap, look look(table1) def recmapper(rec):
def execute(self, context): try: nm_arq = 'BENEFICIARIO.csv' with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from: data_from.write( self.client_from.get_blob_client( nm_arq).download_blob().readall()) table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|') def rowmapper(row): transmf = {'MASCULINO': 'M', 'FEMININO': 'F'} strnull = {'NULL': ''} return [ row[0].strip(), row[1].strip(), row[2].strip(), row[3].strip(), row['nome'].strip(), row[5].strip(), row[6].strip(), row[7].strip(), row[8].strip(), strnull[row['copart_percentual'].strip()] if row['copart_percentual'].strip() in strnull else row['copart_percentual'].strip(), strnull[row['limite_copart'].strip()] if row['limite_copart'].strip() in strnull else row['limite_copart'].strip(), row[11].strip(), row[12].strip(), row[13].strip(), row[14].strip(), strnull[row['dt_exclusao'].strip()] if row['dt_exclusao'].strip() in strnull else row['dt_exclusao'].strip(), row[16], transmf[row['sexo'].strip()] if row['sexo'].strip() in transmf else None, row[18].strip(), row[19].strip(), row[20].strip(), row[21].strip(), row[22].strip(), row[23].strip(), row[24].strip(), row[25].strip(), row[26].strip(), row[27].strip(), row[28].strip(), row[29].strip() ] table1 = etl.rowmap( table, rowmapper, header=[ 'pk_beneficiario', 'nr_beneficiario', 'nr_beneficiario_tit', 'fk_empresa', 'nome', 'cpf', 'dt_nascimento', 'cod_plano', 'descricao_plano', 'copart_percentual', 'limite_copart', 'tipo_acomodacao', 'abrangencia_plano', 'grau_dependencia', 'dt_inclusao', 'dt_exclusao', 'nome_mae', 'sexo', 'tipo_contrato', 'endereco', 'bairro', 'cidade', 'uf', 'cep', 'tipo_cliente', 'nr_cartaonacionalsaude', 'plano_regulamentado', 'descricao_entidade', 'tipo_pessoa', 'acao_judicial' ]) table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)]) etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|') self.logger.info(f"Destino do arquivo {self.client_to}") self.logger.info(f"Container destino {self.container_to}") except azure.core.exceptions.ResourceNotFoundError: print('Entrou na exceção :)') upload_file = f'{_PROC_FILES}/t{nm_arq}' try: if os.path.isfile(upload_file): with open(upload_file, "rb") as data: self.client_to.upload_blob(nm_arq, data, overwrite=True) self.logger.info(f'{data} carregado') else: self.logger.info(f't{nm_arq} não foi encontrado no container') finally: self.logger.info('Tudo Carregado')
def execute(self, context): try: nm_arq = 'PRESTADORES.csv' with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from: data_from.write( self.client_from.get_blob_client( nm_arq).download_blob().readall()) table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|') def rowmapper(row): strnull = {'NULL': ''} return [ row[0].strip(), row[1].strip(), strnull[row['nome_completo'].strip()] if row['nome_completo'].strip() in strnull else row['nome_completo'].strip(), row[3].strip(), row[4].strip(), strnull[row['dat_nascimento'].strip()] if row['dat_nascimento'].strip() in strnull else row['dat_nascimento'].strip(), row[6].strip(), row[7].strip(), row[8].strip(), strnull[row['cnae'].strip()] if row['cnae'].strip() in strnull else row['cnae'].strip(), row[10].strip(), strnull[row['grau'].strip()] if row['grau'].strip() in strnull else row['grau'].strip(), row[12].strip(), row[13].strip(), row[14].strip(), row[15].strip(), row[16].strip(), row[17].strip(), row[18].strip(), strnull[row['latitude'].strip()] if row['latitude'].strip() in strnull else row['latitude'].strip(), strnull[row['longitude'].strip()] if row['longitude'].strip() in strnull else row['longitude'].strip(), strnull[row['fone1'].strip()] if row['fone1'].strip() in strnull else row['fone1'].strip(), strnull[row['fone2'].strip()] if row['fone2'].strip() in strnull else row['fone2'].strip(), strnull[row['fone3'].strip()] if row['fone3'].strip() in strnull else row['fone3'].strip() ] table1 = etl.rowmap( table, rowmapper, header=[ 'pk_medico_prestador', 'nome_fantasia', 'nome_completo', 'cod_crm', 'f_crm', 'dat_nascimento', 'num_cpf_cnpj', 'tipo_prestador', 'grupo_prestador', 'cnae', 'especialidade', 'grau', 'endereço', 'numero', 'Complemento', 'bairro', 'cidade_prestador', 'uf_prestador', 'cep', 'latitude', 'longitude', 'fone1', 'fone2', 'fone3' ]) table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)]) etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|') self.logger.info(f"Destino do arquivo {self.client_to}") self.logger.info(f"Container destino {self.container_to}") except azure.core.exceptions.ResourceNotFoundError: print('Entrou na exceção :)') upload_file = f'{_PROC_FILES}/t{nm_arq}' try: if os.path.isfile(upload_file): with open(upload_file, "rb") as data: self.client_to.upload_blob(nm_arq, data, overwrite=True) self.logger.info(f'{data} carregado') else: self.logger.info(f't{nm_arq} não foi encontrado no container') finally: self.logger.info('Tudo Carregado')
def execute(self, context): try: nm_arq = 'COSTOS.csv' with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from: data_from.write( self.client_from.get_blob_client( nm_arq).download_blob().readall()) table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|') def rowmapper(row): strnull = {'NULL': ''} return [ row[0].strip(), row[1].strip(), row[2].strip(), row[3].strip(), strnull[row['nr_guia_principal'].strip()] if row['nr_guia_principal'].strip() in strnull else row['nr_guia_principal'].strip(), row[5].strip(), strnull[row['fk_cid'].strip()] if row['fk_cid'].strip() in strnull else row['fk_cid'].strip(), strnull[row['dt_ini_internacao'].strip()] if row['dt_ini_internacao'].strip() in strnull else row['dt_ini_internacao'].strip(), strnull[row['dt_fim_internacao'].strip()] if row['dt_fim_internacao'].strip() in strnull else row['dt_fim_internacao'].strip(), strnull[row['fk_medicosolicitante'].strip()] if row['fk_medicosolicitante'].strip() in strnull else row['fk_medicosolicitante'].strip(), strnull[row['cod_executante'].strip()] if row['cod_executante'].strip() in strnull else row['cod_executante'].strip(), row[11].strip(), row[12].strip(), row[13].strip(), row[14].strip(), row[15].strip(), row[16].strip(), strnull[row['qtd_unica'].strip()] if row['qtd_unica'].strip() in strnull else row['qtd_unica'].strip(), row[18].strip(), row[19].strip(), row[20].strip(), row[21].strip(), row[22].strip(), row[23].strip(), row[24].strip(), strnull[row['tipo_internacao'].strip()] if row['tipo_internacao'].strip() in strnull else row['tipo_internacao'].strip(), strnull[row['tipo_acomodacao'].strip()] if row['tipo_acomodacao'].strip() in strnull else row['tipo_acomodacao'].strip(), strnull[row['ds_indicacao_clinica'].strip()] if row['ds_indicacao_clinica'].strip() in strnull else row['ds_indicacao_clinica'].strip(), row[28].strip(), row[29].strip(), row[30].strip(), strnull[row['partic_medico'].strip()] if row['partic_medico'].strip() in strnull else row['partic_medico'].strip() ] table1 = etl.rowmap( table, rowmapper, header=[ 'cod_custo', 'fk_beneficiario', 'mes_competencia', 'nr_guia', 'nr_guia_principal', 'dt_realizacao', 'fk_cid', 'dt_ini_internacao', 'dt_fim_internacao', 'fk_medicosolicitante', 'cod_executante', 'fk_local_atend', 'fk_capa_unico', 'fk_destino_pgto', 'fk_servico', 'cod_servico_tipo', 'qtd', 'qtd_unica', 'vl_unitario', 'vl_total', 'vl_coparticipacao', 'ind_internacao', 'tipo_registro', 'tipo_atendimento', 'tipo_guia', 'tipo_internacao', 'tipo_acomodacao', 'ds_indicacao_clinica', 'vl_hm', 'vl_co', 'vl_filme', 'partic_medico' ]) table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)]) etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|') self.logger.info(f"Destino do arquivo {self.client_to}") self.logger.info(f"Container destino {self.container_to}") except azure.core.exceptions.ResourceNotFoundError: print('Entrou na exceção :)') upload_file = f'{_PROC_FILES}/t{nm_arq}' try: if os.path.isfile(upload_file): with open(upload_file, "rb") as data: self.client_to.upload_blob(nm_arq, data, overwrite=True) self.logger.info(f'{data} carregado') else: self.logger.info(f't{nm_arq} não foi encontrado no container') finally: self.logger.info('Tudo Carregado')