Python read_data示例，parser.read_data Python示例

示例#1

0

显示文件

文件： inactive_members_parser.py 项目： dadosjusbr/coletores

def parse_jan_to_april_aug_19(file_name):
    rows = parser.read_data(file_name).to_numpy()
    begin_row = parser.get_begin_row(rows)
    end_row = parser.get_end_row(rows, begin_row, file_name)

    typeE = parser.type_employee(file_name)
    activeE = "inativos" not in file_name
    employees = {}
    curr_row = 0
    for row in rows:
        if curr_row < begin_row:
            curr_row += 1
            continue

        matricula = str(int(row[0]))  # convert to string by removing the '.0'
        name = row[1].strip()  # removes blank spaces present in some cells
        role = row[2]  # cargo
        workplace = row[3]  # Lotação
        sal_base = format_value(row[4])  # Salário Base
        outras_remuneracoes = format_value(
            row[5])  # Outras Verbas Remuneratórias, Legais ou Judiciais
        grat_natal = format_value(row[6])  # Gratificação Natalina (13º  sal.)
        total_descontos = format_value(row[11])
        teto_constitucional = format_value(
            row[10])  # Retenção por teto constitucional
        contribuicao_previdenciaria = format_value(row[8])
        imposto_renda = format_value(row[12])
        total_bruto = sal_base + outras_remuneracoes + grat_natal

        employees[matricula] = {
            "reg": matricula,
            "name": name,
            "role": role,
            "type": typeE,
            "workplace": workplace,
            "active": activeE,
            "income": {
                "total": round(total_bruto, 2),
                "wage": sal_base + outras_remuneracoes,
                "other": {  # Gratificações
                    "total": grat_natal,
                    "others_total": grat_natal,
                    "others": {
                        "Gratificação Natalina": grat_natal,
                    },
                },
            },
            "discounts": {  # Discounts Object. Using abs to garantee numbers are positivo (spreadsheet have negative discounts).
                "total": abs(total_descontos),
                "prev_contribution": abs(contribuicao_previdenciaria),
                "ceil_retention": abs(teto_constitucional),
                "income_tax": abs(imposto_renda),
            },
        }

        curr_row += 1
        if curr_row > end_row:
            break
    return employees

示例#2

0

显示文件

文件： parser_april20_backward.py 项目： dadosjusbr/coletores

def update_employee_indemnity(file_name, employees):
    rows = parser.read_data(file_name).to_numpy()
    begin_string = "Matrícula"  # word before starting data
    end_string = "TOTAL"
    begin_row = parser.get_begin_row(rows, begin_string)
    end_row = parser.get_end_row(rows, begin_row, end_string)
    curr_row = 0

    for row in rows:
        if curr_row < begin_row:
            curr_row += 1
            continue
        matricula = row[1]

        if(type(matricula) != str):
            matricula = str(matricula)
        if matricula in employees.keys():
            vale_alimentacao = format_value(row[5])
            auxilio_alimentacao = format_value(row[6])
            auxilio_creche = format_value(row[7])
            transporte = format_value(row[8])
            saude = format_value(row[9])
            moradia = format_value(row[10])
            total_indenizacoes = (
                vale_alimentacao
                + auxilio_alimentacao
                + auxilio_creche
                + transporte
                + moradia
                + saude
            )
            emp = employees[matricula]

            emp["income"].update(
                {
                    "total": round(emp["income"]["total"] + total_indenizacoes, 2),
                }
            )

            emp["income"].update(
                {
                    "perks": {
                        "total": round(total_indenizacoes,2),
                        "food": vale_alimentacao + auxilio_alimentacao,
                        "pre_school": auxilio_creche,
                        "transportation": transporte,
                        "housing_aid": moradia,
                        "health": saude,
                    }
                }
            )

            employees[matricula] = emp

            curr_row += 1
            if curr_row > end_row:
                break
    return employees

示例#3

0

显示文件

文件： inactive_servants_indemnity_parser.py 项目： dadosjusbr/coletores

def update_employee_indemnity_dec_2020(file_name, employees):

    rows = parser.read_data(file_name).to_numpy()
    begin_row = parser.get_begin_row(rows)
    end_row = parser.get_end_row(rows, begin_row, file_name)

    curr_row = 0

    for row in rows:
        if curr_row < begin_row:
            curr_row += 1
            continue

        matricula = str(int(row[0]))  # convert to string by removing the '.0'
        alimentacao = format_value(row[4])
        transporte = format_value(row[5])  # Auxilio Transporte
        ferias_pc = format_value(row[6])  # Férias em pecunia
        licensa_pc = format_value(row[7])  # Licença Prêmio em Pecúnia
        insalubridade = format_value(row[8])  # Adicional de Insalubridade

        emp = employees[matricula]

        emp["income"]["perks"].update({
            "total": alimentacao + transporte,
            "food": alimentacao,
            "transportation": transporte,
            "vacation_pecuniary": ferias_pc,
            "premium_license_pecuniary": licensa_pc
        })
        emp["income"]["other"].update({
            "total":
            round(
                emp["income"]["other"]["total"] + insalubridade,
                2,
            ),
            "others_total":
            round(
                emp["income"]["other"]["others_total"] + insalubridade,
                2,
            ),
        })

        emp["income"]["other"]["others"].update({
            "INSALUBRIDADE": insalubridade,
        })

        employees[matricula] = emp

        curr_row += 1
        if curr_row > end_row:
            break

    return employees

示例#4

0

显示文件

文件： load.py 项目： cayman/decision

        sector = {
            'id': sector_id,
            'name': name[parser.SECTOR],
            'companies': int(name[parser.SECTOR_COUNT]),
            'files': 1
        }
        sectors[name[parser.SECTOR]] = sector
        cursor.execute("""INSERT INTO fa_sector
            VALUES (%s,%s)""", (sector['id'], sector['name']))
    else:
        sector = sectors[name[parser.SECTOR]]
        sector['files'] += 1

    #print(name)
    data = parser.read_data(os.path.join(xsl_config['files'], file),
                            name[parser.COMPANY_NAME], name[parser.CURRENCY],
                            name[parser.YEAR_MIN], name[parser.YEAR_MAX])

    company = {
        'file': file,
        'id': name[parser.COMPANY_ID],
        'name': name[parser.COMPANY_NAME],
        'sector_id': sector['id'],
        'sector_name': sector['name'],
        'count': name[parser.SECTOR_COUNT],
        'data': data
    }

    if company['name'] not in companies:
        companies.append(company['name'])
        cursor.execute(

示例#5

0

显示文件

文件： inactive_members_parser.py 项目： dadosjusbr/coletores

def parse_may_19(file_name):
    rows = parser.read_data(file_name).to_numpy()
    begin_row = parser.get_begin_row(rows)
    end_row = parser.get_end_row(rows, begin_row, file_name)

    typeE = parser.type_employee(file_name)
    activeE = "inativos" not in file_name
    employees = {}
    curr_row = 0
    for row in rows:
        if curr_row < begin_row:
            curr_row += 1
            continue

        matricula = str(int(row[0]))  # convert to string by removing the '.0'
        name = row[1].strip()  # removes blank spaces present in some cells
        role = row[2]  # cargo
        workplace = row[3]  # Lotação
        sal_base = format_value(row[4])  # Salário Base
        outras_remuneracoes = format_value(
            row[5])  # Outras Verbas Remuneratórias, Legais ou Judiciais
        comissao = format_value(
            row[6])  # Função de Confiança ou Cargo em Comissão
        grat_natal = format_value(row[7])  # Gratificação Natalina (13º  sal.)
        ferias = format_value(row[8])  # Férias (1/3 Constiticional)
        permanencia = format_value(row[9])  # Abono de Permanência
        contribuicao_previdenciaria = format_value(row[11])
        imposto_renda = format_value(row[12])
        teto_constitucional = format_value(
            row[13])  # Retenção por teto constitucional
        total_descontos = format_value(row[14])
        alimentacao = format_value(row[16])  # Auxilio alimentação
        ferias_pc = format_value(row[17])  # Férias em pecunia
        total_indenizacoes = alimentacao + ferias_pc
        total_gratificacoes = grat_natal + comissao + permanencia
        total_bruto = (total_gratificacoes + total_indenizacoes + sal_base +
                       outras_remuneracoes)

        employees[matricula] = {
            "reg": matricula,
            "name": name,
            "role": role,
            "type": typeE,
            "workplace": workplace,
            "active": activeE,
            "income": {
                "total": round(total_bruto, 2),
                "wage": sal_base + outras_remuneracoes,
                "perks": {
                    "total": total_indenizacoes,
                    "food": alimentacao,
                    "ferias em pecunia": ferias_pc,
                },
                "other": {  # Gratificações
                    "total": grat_natal + comissao + permanencia,
                    "trust_position": comissao,
                    "others_total": grat_natal + permanencia,
                    "others": {
                        "Gratificação Natalina": grat_natal,
                        "Férias (1/3 constitucional)": ferias,
                        "Abono de Permanência": permanencia,
                    },
                },
            },
            "discounts": {  # Discounts Object. Using abs to garantee numbers are positivo (spreadsheet have negative discounts).
                "total": abs(total_descontos),
                "prev_contribution": abs(contribuicao_previdenciaria),
                "income_tax": abs(imposto_renda),
                "ceil_retention": abs(teto_constitucional),
            },
        }
        curr_row += 1
        if curr_row > end_row:
            break

    return employees

示例#6

0

显示文件

import string
import parser
dir = '/media/sf_cloud/Инвестиции/Export/'
file = 'Пищевая промышленность_6/157_Красный Октябрь_U_1996_2016.xls'

print('=>',file)    
name = parser.parse_name(file)

data = parser.read_data(dir+file,name[parser.COMPANY_NAME],name[parser.CURRENCY],name[parser.YEAR_MIN],name[parser.YEAR_MAX])
names = {}
units = []
for line in data:
#print(('{name}: ' + ('{unit}' if line['unit'] else '') + ('({quantity})' if line['quantity'] else '')
#           + ': {year} = {value}').format(**line))
	if line['name'] not in names:
		names[line['name']]='{0}{1}'.format(line['unit'],'('+line['quantity']+')' if line['quantity'] else '')
	if line['unit'] and line['unit'] not in units:
		units.append(line['unit'])

print('Параметры:')
for name,unit in names.items():
	if '(тыс.)' in unit:
		print('{0}: {1}'.format(name,unit))
	
print('Единицы измерения:')
for unit in units:
		print('{0}'.format(unit))

示例#7

0

显示文件

        c_mat = c_mat + new_part
    c_mat = c_mat / data.shape[0]

    return c_mat


#Get the eigenvalues, and pair them with the corresponding eigenvectors. Then sort this list in descending order for eigenvalues.
def get_eigenvalues(matrix):
    e_values, e_vectors = numpy.linalg.eig(matrix)
    eigens = []
    for i in range(e_values.shape[0]):
        eigens.append((e_values[i], e_vectors[:, i]))
    eigens.sort(key=lambda x: x[0], reverse=True)

    return eigens


if __name__ == "__main__":
    data_file = 'p4-data.txt'
    data = read_data(data_file)
    print("data shape: {}".format(data.shape))

    cov_matrix = get_covariance_matrix(data)

    eigens = get_eigenvalues(cov_matrix)

    num_to_show = 10
    print("Top 10 Eigen Values in decreasing order:")
    for i in range(num_to_show):
        (e_val, _) = eigens[i]
        print(e_val.real)

示例#8

0

显示文件

    return k_centers, sses


#Plots the TOTAL SSE of all clusters over iterations.
def make_plot(sses):
    import matplotlib.pyplot as plt
    plt.plot(list(range(1, len(sses) + 1)), sses)
    plt.title('Sum of SSE of all Clusters over iterations ')
    plt.xlabel('Iterations')
    plt.ylabel('Total Sum of SSE')
    plt.tight_layout()
    plt.savefig('kmeans.png')
    plt.show()


if __name__ == "__main__":
    import argparse
    argparser = argparse.ArgumentParser()
    argparser.add_argument("data", help="Data to cluster")
    argparser.add_argument("k", help="K for K-Means clustering")
    args = argparser.parse_args()

    data = read_data(args.data)

    means, sses = find_means(data, int(args.k))

    for mean in means:
        print(mean)

    make_plot(sses)

示例#9

0

显示文件

文件： inactive_servants_indemnity_parser.py 项目： dadosjusbr/coletores

def update_employee_indemnity_sept_to_nov_2020(file_name, employees):

    rows = parser.read_data(file_name).to_numpy()
    begin_row = parser.get_begin_row(rows)
    end_row = parser.get_end_row(rows, begin_row, file_name)

    curr_row = 0

    for row in rows:
        if curr_row < begin_row:
            curr_row += 1
            continue

        matricula = str(int(row[0]))  # convert to string by removing the '.0'
        alimentacao = format_value(row[4])
        transporte = format_value(row[5])  # Auxilio Transporte
        insalubridade = format_value(row[6])  # Adicional de Insalubridade
        grat_qualificacao = format_value(
            row[7])  # Gratificação de Qualificação

        emp = employees[matricula]
        emp["income"].update({
            "total":
            round(
                emp["income"]["total"] + alimentacao + transporte +
                insalubridade + grat_qualificacao,
                2,
            )
        })
        emp["income"]["perks"].update({
            "total": alimentacao + transporte,
            "food": alimentacao,
            "transportation": transporte,
        })
        emp["income"]["other"].update({
            "total":
            round(
                emp["income"]["other"]["total"] + insalubridade +
                grat_qualificacao,
                2,
            ),
            "others_total":
            round(
                emp["income"]["other"]["others_total"] + insalubridade +
                grat_qualificacao,
                2,
            ),
        })

        emp["income"]["other"]["others"].update({
            "INSALUBRIDADE":
            insalubridade,
            "QUALIFICACAO":
            grat_qualificacao,
        })

        employees[matricula] = emp

        curr_row += 1
        if curr_row > end_row:
            break

    return employees

示例#10

0

显示文件

文件： parser_april20_backward.py 项目： dadosjusbr/coletores

def parse_employees(file_name):
    rows = parser.read_data(file_name).to_numpy()
    begin_string = "Matrícula"
    end_string = "TOTAL"
    begin_row = parser.get_begin_row(rows, begin_string)
    end_row = parser.get_end_row(rows, begin_row, end_string)

    employees = {}
    curr_row = 0
    for row in rows:
        if curr_row < begin_row:
            curr_row += 1
            continue
        
        matricula = row[1]
        if(type(matricula) != str):
            matricula = str(matricula)
        nome = row[2]
        cargo_efetivo = row[3]
        unidade_administrativa = row[5]
        remuneracao_cargo_efetivo = format_value(row[6])
        outras_verbas_remuneratorias = format_value(row[7])
        confianca_comissao = format_value(
            row[8]
        )  # Função de Confiança ou Cargo em Comissão
        grat_natalina = format_value(row[9])  # Gratificação Natalina
        ferias = format_value(row[10])
        permanencia = format_value(row[11])  # Abono de Permanência
        previdencia = format_value(row[13])  # Contribuição Previdenciária
        imp_renda = format_value(row[14])  # Imposto de Renda
        teto_constitucional = format_value(row[15])  # Retenção por Teto Constitucional
        total_desconto = previdencia + imp_renda + teto_constitucional
        outras_remuneracoes_temporarias = format_value(row[19])
        total_gratificacoes = (
            grat_natalina
            + ferias
            + permanencia
            + confianca_comissao
            + outras_remuneracoes_temporarias
        )
        total_bruto = (
            remuneracao_cargo_efetivo
            + outras_verbas_remuneratorias
            + total_gratificacoes
        )
        employees[matricula] = {
            "reg": matricula,
            "name": nome,
            "role": cargo_efetivo,
            "type": "membro",
            "workplace": unidade_administrativa,
            "active": True,
            "income": {
                "total": total_bruto,
                # REMUNERAÇÃO BÁSICA = Remuneração Cargo Efetivo + Outras Verbas Remuneratórias, Legais ou Judiciais
                "wage": remuneracao_cargo_efetivo + outras_verbas_remuneratorias,
                "other": {  # Gratificações
                    "total": total_gratificacoes,
                    "trust_position": confianca_comissao,
                    "others_total": grat_natalina
                    + ferias
                    + permanencia
                    + outras_remuneracoes_temporarias,
                    "others": {
                        "Gratificação Natalina": grat_natalina,
                        "Férias (1/3 constitucional)": ferias,
                        "Abono de Permanência": permanencia,
                        "Outras Remunerações Temporárias": outras_remuneracoes_temporarias,
                    },
                },
            },
            "discounts": {  # Discounts Object. Using abs to garantee numbers are positivo (spreadsheet have negative discounts).
                "total": round(total_desconto, 2),
                "prev_contribution": previdencia,
                # Retenção por teto constitucional
                "ceil_retention": teto_constitucional,
                "income_tax": imp_renda,
            },
        }

        curr_row += 1
        if curr_row > end_row:
            break
    
    return employees

示例#11

0

显示文件

def update_employee_indemnity_sept_2019_to_jan_and_nov_2020(
        file_name, employees):
    rows = parser.read_data(file_name).to_numpy()
    begin_row = parser.get_begin_row(rows)
    end_row = parser.get_end_row(rows, begin_row, file_name)

    curr_row = 0

    for row in rows:
        if curr_row < begin_row:
            curr_row += 1
            continue

        matricula = str(int(row[0]))  # convert to string by removing the '.0'
        alimentacao = format_value(row[4])
        ferias_pc = format_value(row[5])
        licensa_pc = format_value(row[6])
        cumulativa = format_value(row[7])  # Gratificação Cumulativa
        grat_natureza = format_value(
            row[8])  # Gratificação de Natureza Especial
        atuacao_especial = format_value(
            row[9])  # Gratificação de Grupo de Atuação Especial

        if (
                matricula in employees.keys()
        ):  # Realiza o update apenas para os servidores que estão na planilha de remuneração mensal

            emp = employees[matricula]
            total_outras_gratificacoes = round(
                emp["income"]["other"]["others_total"] + cumulativa +
                grat_natureza + atuacao_especial,
                2,
            )
            total_gratificacoes = round(
                emp["income"]["other"]["total"] + cumulativa + grat_natureza +
                atuacao_especial,
                2,
            )
            emp["income"].update({
                "total":
                round(emp["income"]["total"] + cumulativa + grat_natureza, 2)
            })

            emp["income"]["perks"].update({
                "total":
                round(ferias_pc + alimentacao + licensa_pc, 2),
                "food":
                alimentacao,
                "vacation_pecuniary":
                ferias_pc,
                "premium_license_pecuniary":
                licensa_pc,
            })

            emp["income"]["other"].update({
                "total":
                total_gratificacoes,
                "others_total":
                total_outras_gratificacoes,
            })

            emp["income"]["other"]["others"].update({
                "GRAT. CUMULATIVA":
                cumulativa,
                "GRAT. NATUREZA ESPECIAL":
                grat_natureza,
                "GRAT. DE GRUPO DE ATUAÇÃO ESPECIAL":
                atuacao_especial,
            })

            employees[matricula] = emp

        curr_row += 1
        if curr_row > end_row:
            break

    return employees

示例#12

0

显示文件

def update_employee_indemnity_aug_sept_2020(file_name, employees):
    rows = parser.read_data(file_name).to_numpy()
    begin_row = parser.get_begin_row(rows)
    end_row = parser.get_end_row(rows, begin_row, file_name)

    curr_row = 0

    for row in rows:
        if curr_row < begin_row:
            curr_row += 1
            continue

        matricula = str(int(row[0]))  # convert to string by removing the '.0'
        alimentacao = format_value(row[4])
        transporte = format_value(row[5])  # Auxilio Transporte
        creche = format_value(row[6])  # Auxilio Creche
        ferias_pc = format_value(row[7])
        licensa_pc = format_value(row[8])  # Licensa em pecunia
        licensa_compensatoria = format_value(
            row[9])  # Licença Compensatória ato 1124/18
        insalubridade = format_value(row[10])  # Adicional de Insalubridade
        subs_funcao = format_value(row[11])  # Substituição de Função
        viatura = format_value(row[12])  # Viatura
        cumulativa = format_value(row[13])  # Gratificação Cumulativa
        grat_qualificacao = format_value(row[14])
        grat_natureza = format_value(
            row[15])  # Gratificação de Natureza Especial
        atuacao_especial = format_value(
            row[16])  # Gratificação de Grupo de Atuação Especial

        if (
                matricula in employees.keys()
        ):  # Realiza o update apenas para os servidores que estão na planilha de remuneração mensal

            emp = employees[matricula]
            total_outras_gratificacoes = round(
                emp["income"]["other"]["others_total"] + cumulativa +
                grat_natureza + atuacao_especial + grat_qualificacao +
                viatura + insalubridade + subs_funcao,
                2,
            )
            total_gratificacoes = round(
                emp["income"]["other"]["total"] + cumulativa + grat_natureza +
                atuacao_especial + grat_qualificacao + viatura +
                insalubridade + subs_funcao,
                2,
            )

            emp["income"].update({
                "total":
                round(
                    emp["income"]["total"] + cumulativa + grat_natureza +
                    atuacao_especial + grat_qualificacao + viatura +
                    insalubridade + subs_funcao,
                    2,
                )
            })

            emp["income"]["perks"].update({
                "total":
                round(
                    ferias_pc + alimentacao + transporte + creche +
                    licensa_compensatoria + licensa_pc,
                    2,
                ),
                "food":
                alimentacao,
                "transportation":
                transporte,
                "pre_school":
                creche,
                "vacation_pecuniary":
                ferias_pc,
                "premium_license_pecuniary":
                licensa_pc,
                "compensatory_leave":
                licensa_compensatoria,
            })

            emp["income"]["other"].update({
                "total":
                total_gratificacoes,
                "others_total":
                total_outras_gratificacoes,
            })

            emp["income"]["other"]["others"].update({
                "INSALUBRIDADE":
                insalubridade,
                "SUBS. DE FUNÇÃO":
                subs_funcao,
                "VIATURA":
                viatura,
                "GRAT. CUMULATIVA":
                cumulativa,
                "GRAT. DE QUALIFICAÇÃO":
                grat_qualificacao,
                "GRAT. NATUREZA ESPECIAL":
                grat_natureza,
                "GRAT. DE GRUPO DE ATUAÇÃO ESPECIAL":
                atuacao_especial,
            })

            employees[matricula] = emp

        curr_row += 1
        if curr_row > end_row:
            break

    return employees

示例#13

0

显示文件

文件： parser_may20_forward.py 项目： dadosjusbr/coletores

def update_employee_indemnity(file_name, employees):
    rows = parser.read_data(file_name).to_numpy()

    begin_string = "Matrícula"  # word before starting data
    end_string = "TOTAL"
    begin_row = parser.get_begin_row(rows, begin_string)
    end_row = parser.get_end_row(rows, begin_row, end_string)
    curr_row = 0

    for row in rows:
        if curr_row < begin_row:
            curr_row += 1
            continue

        matricula = row[1]
        if (type(matricula) != str):
            matricula = str(matricula)
        if matricula in employees.keys():
            auxilio_alimentacao = format_value(row[5])
            auxilio_creche = format_value(row[6])
            saude = format_value(row[7])
            transporte = format_value(row[8])
            indenizacoes = format_value(row[9])
            indenizacoes_diligencias = format_value(row[10])
            periculosidade_insalubridade = format_value(row[11])
            gratificacoes = format_value(row[12])
            outras_remuneracoes = format_value(row[13])

            total_indenizacoes = (auxilio_alimentacao + auxilio_creche +
                                  transporte + saude + indenizacoes +
                                  indenizacoes_diligencias)
            emp = employees[matricula]

            emp["income"].update({
                "total":
                round(
                    emp["income"]["total"] + total_indenizacoes +
                    +periculosidade_insalubridade + gratificacoes, 2),
            })

            emp["income"].update({
                "perks": {
                    "total": round(total_indenizacoes, 2),
                    "food": auxilio_alimentacao,
                    "pre_school": auxilio_creche,
                    "transportation": transporte,
                    "health": saude,
                    "indemnities": indenizacoes,
                    "indemnities_diligences": indenizacoes_diligencias
                }
            })
            emp["income"]['other'].update({
                "others_total":
                round(
                    emp["income"]["other"]["others_total"] +
                    periculosidade_insalubridade + gratificacoes, 2),
                "total":
                round(
                    emp["income"]["other"]["total"] +
                    periculosidade_insalubridade + gratificacoes, 2),
            })
            emp['income']['other']['others'].update({
                'Insalubridade 10%':
                periculosidade_insalubridade,
                'Gratificações':
                gratificacoes,
            })

            employees[row[1]] = emp

            curr_row += 1
            if curr_row > end_row:
                break
    return employees