import os import requests from bs4 import BeautifulSoup import re import zipfile import glob import re import matplotlib.pyplot as plt import matplotlib.style as style # style.available style.use('fivethirtyeight') # ------------------------------------------------------------ # lendo a lista dos IDs e nome dos pesquisadores df_idlist = readIdList() # ------------------------------------------------------------ # roda as funcoes para pegar dados de cada pesquisador for nid in range(len(df_idlist)): zipfilename = str(df_idlist.iloc[nid, 0]) + '.zip' getprojpesqext(zipfilename) getprodtec(zipfilename) getorient(zipfilename) getperiod(zipfilename) getlivro(zipfilename) getcapit(zipfilename) getnomecompleto(zipfilename) getdiscip(zipfilename)
def getgrapho(): # lendo a lista dos IDs e nome dos pesquisadores df_idlist = readIdList() # df_idlist['ID_LATTES'] = df_idlist['ID_LATTES'].apply(ss) config_file = open('./config.txt', 'r') yyi = config_file.readlines()[5].split(':')[1] yyi = yyi.rstrip('\n') yyi = yyi.strip(' ') yyi = float(yyi) config_file.close() config_file = open('./config.txt', 'r') yyf = config_file.readlines()[6].split(':')[1] yyf = yyf.rstrip('\n') yyf = yyf.strip(' ') yyf = float(yyf) config_file.close() # ------------------------------------------------------------ # importadando os data frames gerados pelo gettidy # ------------------------------------------------------------ dfppe_uniq = pd.read_csv('./csv_producao/projetos_uniq.csv', header=0) dfpaper = pd.read_csv('./csv_producao/periodicos_all.csv', header=0) dfpaper_uniq = pd.read_csv('./csv_producao/periodicos_uniq.csv', header=0) # paper uniq dfpaper['ID'] = dfpaper['ID'].apply(ss) dfpaper_uniq['ID'] = dfpaper_uniq['ID'].apply(ss) # filtrando o ano # projetos dfppe_uniq['YEAR_INI'] = dfppe_uniq['YEAR_INI'].replace('VAZIO', -99) num99 = dfppe_uniq[dfppe_uniq['YEAR_INI'] == -99] if len(num99) >= 1: print('------------------------------------------------------------') print('ATENCAO: ' + str(len(num99)) + 'projetos sem ano inicial') print('------------------------------------------------------------') dfppe_uniq['YEAR_INI'] = dfppe_uniq['YEAR_INI'].apply(ff) dfppe_uniq = dfppe_uniq[(dfppe_uniq['YEAR_INI'] >= yyi)] # ------------------------------------------------------------ # periodicos dfpaper['YEAR'] = dfpaper['YEAR'].replace('VAZIO', -99) dfpaper_uniq['YEAR'] = dfpaper_uniq['YEAR'].replace('VAZIO', -99) num99 = dfpaper[dfpaper['YEAR'] == -99] if len(num99) >= 1: print('------------------------------------------------------------') print('ATENCAO: ' + str(len(num99)) + 'artigos sem ano de publicacao') print('------------------------------------------------------------') dfpaper['YEAR'] = dfpaper['YEAR'].apply(ff) dfpaper_uniq['YEAR'] = dfpaper_uniq['YEAR'].apply(ff) dfpaper = dfpaper[(dfpaper['YEAR'] >= yyi) & (dfpaper['YEAR'] <= yyf)] dfpaper_uniq = dfpaper_uniq[(dfpaper_uniq['YEAR'] >= yyi) & (dfpaper_uniq['YEAR'] <= yyf)] # ------------------------------------------------------------ # ordenando por ano (crescente) dfppe_uniq_pesq = dfppe_uniq[dfppe_uniq['NATUREZA'] == 'PESQUISA'] dfppe_uniq_pesq = dfppe_uniq_pesq.sort_values(['YEAR_INI']) dfppe_uniq_ext = dfppe_uniq[dfppe_uniq['NATUREZA'] == 'EXTENSAO'] dfppe_uniq_ext = dfppe_uniq_ext.sort_values(['YEAR_INI']) dfpaper = dfpaper.sort_values(['YEAR']) dfpaper_uniq = dfpaper_uniq.sort_values(['YEAR']) # ------------------------------------------------------------ # carregando df com dados pessoais lscsv_fullname = glob.glob('./csv_producao/*fullname.csv') # df com nome completo, sobrenome e id dffullname = pd.DataFrame() for i in range(len(lscsv_fullname)): a = pd.read_csv(lscsv_fullname[i], header=0, dtype='str') dffullname = dffullname.append(a, ignore_index=False) # passando ID para string, para poder comparar com dfpaper dffullname['ID'] = dffullname['ID'].apply(ss) dffullname = dffullname.reset_index(drop=True) # verificando a interacao de periodicos entre integrantes lsid = [] lsid_tocompare = [] lsinter_qtd = [] for m in range(len(df_idlist)): idd = str(df_idlist.iloc[m, 0]) lname = dffullname[dffullname['ID'] == idd] lname = lname.iloc[0, 1] lname = lname.upper() # lname = lname.split(';') # print(lname) dfids_tocompare = dffullname[dffullname['ID'] != str(idd)] for n in range(len(dfids_tocompare)): idd_tocompare = dfids_tocompare.iloc[n, 0] dd = dfpaper[dfpaper['ID'] == idd_tocompare] lsid.append(str(idd)) lsid_tocompare.append(idd_tocompare) # DANGER ATTENTION FIX lname deve ser o nome completo # removendo caract desnecessarios interac = 0 for o in range(len(dd)): authors = dd.iloc[o, 7].upper() authors = authors.replace('[', '') authors = authors.replace(']', '') authors = authors.replace("'", '') authors = authors.split(',') # print(authors) for op in range(len(authors)): # print(authors[op]) if len(authors[op]) > 0: if authors[op][0] == ' ': authors[op] = authors[op][1:] # interac = 0 inpaper = list(set([lname]) & set(authors)) if len(inpaper) >= 1: interac = interac + 1 # print(interac) # print(lname) # print(authors) lsinter_qtd.append(interac) dfinterac = pd.DataFrame({ 'IDD': lsid, 'IDD_COMP': lsid_tocompare, 'WEIGHT': lsinter_qtd }) # data frame para profissionais sem interacao em periodicos lsnointer_period = [] for m in range(len(df_idlist)): aano = dfinterac[dfinterac['IDD'] == df_idlist.iloc[m, 0]] aasum = aano['WEIGHT'].sum() aano_a = dfinterac[dfinterac['IDD_COMP'] == df_idlist.iloc[m, 0]] aasum_a = aano_a['WEIGHT'].sum() if aasum == 0 and aasum_a == 0: nointer = dffullname[dffullname['ID'] == df_idlist.iloc[ m, 0]].reset_index(drop=True) nointer = nointer.iloc[0, 1] lsnointer_period.append(nointer) dfnointerac = pd.DataFrame({'NOME': lsnointer_period}) dfnointerac.to_csv('./csv_producao/periodicos_nointer.csv', index=False, sep=',') # DANGER ATTENTION # dfinterac.to_csv('test.csv', index=False) # eliminando linhas sem interacao indexremove = [] for i in range(len(lsid)): if lsinter_qtd[i] == 0: indexremove.append(i) for index in sorted(indexremove, reverse=True): del lsid[index] del lsid_tocompare[index] del lsinter_qtd[index] # ------------------------------------------------------------ # Grapho plt.figure(figsize=(12, 9.5)) G = nx.Graph() for i in range(len(lsid)): G.add_edge(lsid[i], lsid_tocompare[i], weight=lsinter_qtd[0]) pos = nx.spring_layout(G, 1.75) # colors for nodes colours = [ '#5a7d9a', 'red', 'green', 'yellow', 'gray', 'orange', 'blue', 'magenta', '#00555a', '#f7d560', 'cyan', '#b6b129', '#a1dd72', '#d49acb', '#d4a69a', '#977e93', '#a3cc72', '#c60acb', '#d4b22a', '#255e53', '#77525a', '#c7d511', '#c4c22b', '#c9b329', '#c8dd22', '#f75acb', '#b1a40a', '#216693', '#b1cd32', '#b33acb', '#c9a32b', '#925e11', '#c5dd39', '#d04205', '#d8a82a', '#373e29' ] lsgroup_uniq = df_idlist['GROUP'].unique() dic_colours = {} for i in range(len(lsgroup_uniq)): dic_colours[lsgroup_uniq[i]] = colours[i] a = list(G.nodes()) node_colours = [] for i in range(len(a)): x = df_idlist[df_idlist['ID_LATTES'] == a[i]] x = x.iloc[0, 2] c = dic_colours[x] node_colours.append(c) # nodes nx.draw_networkx_nodes(G, pos, node_size=400, node_shape='o', node_color=node_colours, alpha=0.7) # labels nn = list(G.nodes) diclabel = {} for i in range(len(nn)): x = df_idlist[df_idlist['ID_LATTES'] == nn[i]] xid = x.iloc[0, 0] xname = x.iloc[0, 1] diclabel[str(xid)] = xname # edges nx.draw_networkx_edges( G, pos, # edgelist=lsinter_qtd, width=1, edge_color='orange') # labels nx.draw_networkx_labels(G, pos, labels=diclabel, font_size=16, font_family='sans-serif') plt.axis('off') plt.tight_layout() plt.savefig('./relatorio/figures/grapho.png')
def capes_indprodart(): # lendo pesquisadores df_idlist = readIdList() num_dp = len(df_idlist) # lendo periodicos_uniq df = pd.read_csv('./csv_producao/periodicos_uniq.csv', header=0, sep=',') num_period_tot = len(df['QUALIS']) df.query('QUALIS != "XX"', inplace=True) df.query('QUALIS != "C "', inplace=True) df['YEAR'] = df['YEAR'].apply(iint) num_period_semqualis = num_period_tot - len(df['QUALIS']) print('Numero de periodicos sem QUALIS = ', num_period_semqualis) # definindo os quadrienios year_fquadrien = 2013 ls_quadri = [year_fquadrien] for i in range(5): year_fquadrien = year_fquadrien + 4 ls_quadri.append(year_fquadrien) # print(ls_quadri) # calculo para cada trienio ls_yini_quad = [] ls_yfin_quad = [] ls_indprodart = [] for i in range(len(ls_quadri)): yini = ls_quadri[i] yfin = ls_quadri[i] + 3 df_qtdby_qualis = df.groupby(['YEAR', 'QUALIS'])[ 'TITLE'].count().reset_index() df_qtdby_qualis.columns = ['YEAR', 'QUALIS', 'AMOUNT'] # print(df_qtdby_qualis) df_qtdby_qualis.query('YEAR >= @yini and YEAR <= @yfin', inplace=True) if len(df_qtdby_qualis) < 1: pass else: # print('Quadrienio', yini, ' - ', yfin) df_qtdby_qualis['PESO'] = df_qtdby_qualis['QUALIS'].apply( fun_indprodart_classif) df_qtdby_qualis['PROD_AMOUPESO'] = df_qtdby_qualis['AMOUNT'] * \ df_qtdby_qualis['PESO'] # verificando representatividade B4 e B5 deve ser <= 0.2 por ano # print(df_qtdby_qualis) df_grade_tot_year = df_qtdby_qualis.groupby( ['YEAR'])['PROD_AMOUPESO'].sum().reset_index() df_qtdby_qualis_b4b5 = df_qtdby_qualis.query( 'QUALIS == "B4" or QUALIS == "B5"') # print(df_grade_tot_year) # print(df_qtdby_qualis_b4b5) ls_years_b4b5_uniq = df_qtdby_qualis_b4b5['YEAR'].unique() for ia in range(len(ls_years_b4b5_uniq)): year_b4b5 = ls_years_b4b5_uniq[ia] df_yearb4b5 = df_qtdby_qualis_b4b5.query('YEAR == @year_b4b5') grade_tot_year_b4b5 = df_yearb4b5['PROD_AMOUPESO'].sum() df_temp = df_grade_tot_year.query('YEAR == @year_b4b5') grade_tot_year = df_temp['PROD_AMOUPESO'].sum() # print('Ano ', str(year_b4b5), 'B4 e B5 representam: ', # str(round(grade_tot_year_b4b5 / grade_tot_year, 2))) if grade_tot_year_b4b5 / grade_tot_year > 0.2: print('Para o ano ', str(year_b4b5), 'artigos B4 B5 glosados, maior que 0.2') df_qtdby_qualis.query( 'YEAR != @year_b4b5 and QUALIS != "B4"', inplace=True) df_qtdby_qualis.query( 'YEAR != @year_b4b5 and QUALIS != "B5"', inplace=True) df_qtdby_qualis = df_qtdby_qualis.groupby( ['YEAR'])['PROD_AMOUPESO'].sum() / num_dp df_qtdby_qualis = df_qtdby_qualis.reset_index() df_qtdby_qualis.columns = ['YEAR', 'INDPRODART'] indprodart = df_qtdby_qualis['INDPRODART'].mean() ls_indprodart.append(indprodart) ls_yini_quad.append(yini) ls_yfin_quad.append(yfin) df_indprodart = pd.DataFrame({'QUADRIENIO_INI': ls_yini_quad, 'QUADRIENIO_FIM': ls_yfin_quad, 'INDPRODART': ls_indprodart}) pathfilename = str('./csv_producao/' + 'capesindex_indprodart' '.csv') df_indprodart.to_csv(pathfilename, index=False) print(pathfilename, ' gravado com', len(df_indprodart), ' quadrienios')
def capes_distindproddp(): # lendo pesquisadores df_idlist = readIdList() num_dp = len(df_idlist) # lendo periodicos_uniq df = pd.read_csv('./csv_producao/periodicos_uniq.csv', header=0, sep=',') num_period_tot = len(df['QUALIS']) df.query('QUALIS != "XX"', inplace=True) df.query('QUALIS != "C "', inplace=True) df['YEAR'] = df['YEAR'].apply(iint) num_period_semqualis = num_period_tot - len(df['QUALIS']) print('Numero de periodicos sem QUALIS = ', num_period_semqualis) # definindo os quadrienios year_fquadrien = 2013 ls_quadri = [year_fquadrien] for i in range(5): year_fquadrien = year_fquadrien + 4 ls_quadri.append(year_fquadrien) # print(ls_quadri) # calculo para cada trienio ls_yini_quad = [] ls_yfin_quad = [] df_indprodart_full = pd.DataFrame(columns=['QUADRIENIO', 'FULL_NAME', 'INDPRODART', 'CLASSIF']) for i in range(len(ls_quadri)): yini = ls_quadri[i] yfin = ls_quadri[i] + 3 df_qtdby_qualis = df.groupby(['YEAR', 'FULL_NAME', 'QUALIS'])['TITLE'].count().reset_index() df_qtdby_qualis.columns = ['YEAR', 'FULL_NAME', 'QUALIS', 'AMOUNT'] # print(df_qtdby_qualis) df_qtdby_qualis.query('YEAR >= @yini and YEAR <= @yfin', inplace=True) if len(df_qtdby_qualis) < 1: pass else: # print('Quadrienio', yini, ' - ', yfin) df_qtdby_qualis['PESO'] = df_qtdby_qualis['QUALIS'].apply( fun_indprodart_classif) df_qtdby_qualis['PROD_AMOUPESO'] = df_qtdby_qualis['AMOUNT'] * \ df_qtdby_qualis['PESO'] # verificando representatividade B4 e B5 deve ser <= 0.2 por ano # print(df_qtdby_qualis) df_grade_tot_year = df_qtdby_qualis.groupby( ['YEAR'])['PROD_AMOUPESO'].sum().reset_index() df_qtdby_qualis_b4b5 = df_qtdby_qualis.query( 'QUALIS == "B4" or QUALIS == "B5"') # print(df_grade_tot_year) # print(df_qtdby_qualis_b4b5) ls_years_b4b5_uniq = df_qtdby_qualis_b4b5['YEAR'].unique() for ia in range(len(ls_years_b4b5_uniq)): year_b4b5 = ls_years_b4b5_uniq[ia] df_yearb4b5 = df_qtdby_qualis_b4b5.query('YEAR == @year_b4b5') grade_tot_year_b4b5 = df_yearb4b5['PROD_AMOUPESO'].sum() df_temp = df_grade_tot_year.query('YEAR == @year_b4b5') grade_tot_year = df_temp['PROD_AMOUPESO'].sum() # print('Ano ', str(year_b4b5), 'B4 e B5 representam: ', # str(round(grade_tot_year_b4b5 / grade_tot_year, 2))) if grade_tot_year_b4b5 / grade_tot_year > 0.2: print('Para o ano ', str(year_b4b5), 'artigos B4 B5 glosados, maior que 0.2') df_qtdby_qualis.query( 'YEAR != @year_b4b5 and QUALIS != "B4"', inplace=True) df_qtdby_qualis.query( 'YEAR != @year_b4b5 and QUALIS != "B5"', inplace=True) df_qtdby_qualis = df_qtdby_qualis.groupby( ['YEAR', 'FULL_NAME'])['PROD_AMOUPESO'].sum() / num_dp df_qtdby_qualis = df_qtdby_qualis.reset_index() df_qtdby_qualis.columns = ['YEAR', 'FULL_NAME', 'INDPRODART'] df_qtdby_qualis = df_qtdby_qualis.groupby( ['FULL_NAME'])['INDPRODART'].mean().reset_index() df_qtdby_qualis['CLASSIF'] = df_qtdby_qualis['INDPRODART'].apply( fun_indori_classif) quadr = str(str(yini) + '-' + str(yfin)) df_qtdby_qualis['QUADRIENIO'] = np.repeat( quadr, len(df_qtdby_qualis)) df_qtdby_qualis = df_qtdby_qualis[[ 'QUADRIENIO', 'FULL_NAME', 'INDPRODART', 'CLASSIF']] df_indprodart_full = pd.concat( [df_indprodart_full, df_qtdby_qualis], axis=0) pathfilename = str('./csv_producao/' + 'capesindex_distindproddp_doce.csv') df_indprodart_full.to_csv(pathfilename, index=False) print(pathfilename, ' gravado com', len( df_indprodart_full), ' pesquisadores para todos os quadrienios') # qd = df_indprodart_full['QUADRIENIO'].unique() df_distindproddp = df_indprodart_full.groupby(['QUADRIENIO', 'CLASSIF'])[ 'FULL_NAME'].count().reset_index() df_distindproddp.columns = ['QUADRIENIO', 'CLASSIF', 'COUNT'] df_distindproddp['DISTINDPRODDP'] = 100 * \ df_distindproddp['COUNT'] / num_dp df_distindproddp.query('CLASSIF != "FRACO" and CLASSIF != \ "DEFICIENTE"', inplace=True) df_distindproddp = df_distindproddp.groupby( ['QUADRIENIO'])['DISTINDPRODDP'].sum().reset_index() pathfilename = str('./csv_producao/' + 'capesindex_distindproddp' '.csv') df_distindproddp.to_csv(pathfilename, index=False) print(pathfilename, ' gravado com', len( df_distindproddp), ' quadrienios')
def capes_indautdis(): # nome ppg # config_file = open('./config.txt', 'r') config_file = open('./config.txt', 'r', encoding='utf-8') name_ppg = config_file.readlines()[8].split(':')[1] name_ppg = name_ppg.rstrip('\n') name_ppg = name_ppg.strip(' ') name_ppg = fun_uppercase(name_ppg) config_file.close() # lendo pesquisadores df_idlist = readIdList() num_dp = len(df_idlist) # lendo orientacoes_all df = pd.read_csv('./csv_producao/orientacoes_all.csv', header=0, sep=',') df = df.query('NATURE == "Dissertação de mestrado" \ or NATURE == "Tese de doutorado"') df = df.query('TYPE != "CO_ORIENTADOR"').reset_index(drop=True) df['COURSE'] = df['COURSE'].apply(fun_uppercase) df['STUDENT'] = df['STUDENT'].apply(fun_uppercase) df['FULL_NAME'] = df['FULL_NAME'].apply(fun_uppercase) df = df.query('COURSE == @name_ppg') # identificando os ppg dos pesquisadores ls_ppgs = df['COURSE'].unique() ls_ppgs.sort() ls_ppgs = ", ".join(ls_ppgs) # print('------------------------------------------------------------') # print("PPGs listados nos curriculos dos pesquisadores: ", ls_ppgs) # print('------------------------------------------------------------') # print("PPG a ser avaliado: ", name_ppg) # print('------------------------------------------------------------') # definindo os quadrienios year_fquadrien = 2013 ls_quadri = [year_fquadrien] df_indautdisc_all = pd.DataFrame( columns=['QUADRIENIO', 'STUDENT', 'DOCENTE', 'TYPE', 'AMOUNT']) for i in range(5): year_fquadrien = year_fquadrien + 4 ls_quadri.append(year_fquadrien) # print(ls_quadri) # calculo para cada trienio for i in range(len(ls_quadri)): ls_yini_quad = [] ls_yfin_quad = [] ls_disc = [] ls_doce = [] ls_disc_type_prod = [] ls_disc_amount_prod_period = [] yini = ls_quadri[i] # egressos ate 5 anos yfin = ls_quadri[i] + 3 # print('Quadrienio', yini, ' - ', yfin) df_disc_quadri = df.query('YEAR >= @yini+3-4 and YEAR <= @yfin+3') df_disc_quadri if len(df_disc_quadri) < 1: print("sem orientacoes para o periodo") pass else: ls_disc_period = list(df_disc_quadri['STUDENT']) ls_doce_period = list(df_disc_quadri['FULL_NAME']) df_period_all = pd.read_csv('./csv_producao/periodicos_all.csv', header=0, sep=',') df_period_all['AUTHOR'] = df_period_all['AUTHOR'].apply( fun_uppercase) for ia in range(len(ls_disc_period)): period_count = 0 for ib in range(len(df_period_all)): # print(ia.upper(), '---', df_period_all.iloc[ib, 7]) zdis = ls_disc_period[ia].split(' ')[-1] zdoc = ls_doce_period[ia].split(' ')[-1] zaut = df_period_all['AUTHOR'].iloc[ib] if zdis in zaut and zdoc in zaut: period_count += 1 disc_type_prod = 'periodico' quadr = str(str(yini) + '-' + str(yfin)) ls_disc.append(ls_disc_period[ia]) ls_doce.append(ls_doce_period[ia]) ls_disc_type_prod.append(disc_type_prod) ls_disc_amount_prod_period.append(period_count) ls_quad = np.repeat(quadr, len(ls_disc)) df_indautdisc = pd.DataFrame(list(zip(ls_quad, ls_disc, ls_doce, ls_disc_type_prod, ls_disc_amount_prod_period)), columns=['QUADRIENIO', 'STUDENT', 'DOCENTE', 'TYPE', 'AMOUNT']) df_indautdisc_all = pd.concat([df_indautdisc_all, df_indautdisc]) ls_indautdisc_quad = [] ls_indautdisc = [] ls_indis = [] quad_indautdisc = df_indautdisc_all['QUADRIENIO'].unique() for i in range(len(quad_indautdisc)): q = quad_indautdisc[i] df_d = df_indautdisc_all.query('QUADRIENIO == @q') disc_zero = len(df_d.query('AMOUNT == 0')) E = (len(df_d) - disc_zero) F = len(df_d) G = df_d['AMOUNT'].sum() indaut = E / F indis = G / F # print(indaut, '--', indis, '--', disc_zero, F) ls_indautdisc_quad.append(q) ls_indautdisc.append(indaut) ls_indis.append(indis) df_indiscente = pd.DataFrame(list(zip(ls_indautdisc_quad, ls_indautdisc, ls_indis)), columns=['QUADRIENIO', 'INDOUT', 'INDIS']) pathfilename = str('./csv_producao/' + 'capesindex_indautdis' '.csv') df_indiscente.to_csv(pathfilename, index=False) print(pathfilename, ' gravado com', len(df_indiscente), ' quadrienios')
def capes_indori(): # nome ppg # config_file = open('./config.txt', 'r') config_file = open('./config.txt', 'r', encoding='utf-8') name_ppg = config_file.readlines()[8].split(':')[1] name_ppg = name_ppg.rstrip('\n') name_ppg = name_ppg.strip(' ') name_ppg = fun_uppercase(name_ppg) config_file.close() # lendo pesquisadores df_idlist = readIdList() num_dp = len(df_idlist) # lendo orientacoes_all df = pd.read_csv('./csv_producao/orientacoes_all.csv', header=0, sep=',') df = df.query('NATURE == "Dissertação de mestrado" \ or NATURE == "Tese de doutorado"') df = df.query('TYPE != "CO_ORIENTADOR"').reset_index(drop=True) df['COURSE'] = df['COURSE'].apply(fun_uppercase) df = df.query('COURSE == @name_ppg') # identificando os ppg dos pesquisadores ls_ppgs = df['COURSE'].unique() ls_ppgs.sort() ls_ppgs = ", ".join(ls_ppgs) # print('------------------------------------------------------------') # print("PPGs listados nos curriculos dos pesquisadores: ", ls_ppgs) # print('------------------------------------------------------------') # print("PPG a ser avaliado: ", name_ppg) # print('------------------------------------------------------------') # definindo os quadrienios year_fquadrien = 2013 ls_quadri = [year_fquadrien] for i in range(5): year_fquadrien = year_fquadrien + 4 ls_quadri.append(year_fquadrien) # print(ls_quadri) # calculo para cada trienio ls_yini_quad = [] ls_yfin_quad = [] ls_indori = [] for i in range(len(ls_quadri)): yini = ls_quadri[i] yfin = ls_quadri[i] + 3 # print('Quadrienio', yini, ' - ', yfin) df_qtdby_yradv = df.groupby(['YEAR', 'NATURE'])[ 'STUDENT'].count().reset_index() # print(df_qtdby_yradv) df_qtdby_yradv.query('YEAR >= @yini and YEAR <= @yfin', inplace=True) if len(df_qtdby_yradv) < 1: pass else: df_qtdby_yradv['PESO_DEF'] = df_qtdby_yradv['NATURE'].apply( fun_peso_defesa) df_qtdby_yradv['PROD_STUPESO'] = (df_qtdby_yradv['STUDENT'] * df_qtdby_yradv['PESO_DEF']) df_qtdby_yradv = df_qtdby_yradv.groupby( ['YEAR'])['PROD_STUPESO'].sum() / num_dp df_qtdby_yradv = df_qtdby_yradv.reset_index() df_qtdby_yradv.columns = ['YEAR', 'INDORI'] indori_quad = df_qtdby_yradv['INDORI'].mean() ls_yini_quad.append(yini) ls_yfin_quad.append(yfin) ls_indori.append(round(indori_quad, 3)) # print(df_qtdby_yradv) # print(indori_quad) df_indori = pd.DataFrame({'QUADRIENIO_INI': ls_yini_quad, 'QUADRIENIO_FIM': ls_yfin_quad, 'INDORI': ls_indori}) df_indori['INDORI_CLASSIFICACAO'] = df_indori['INDORI'].apply( fun_indori_classif) pathfilename = str('./csv_producao/' + 'capesindex_indori' '.csv') df_indori.to_csv(pathfilename, index=False) print(pathfilename, ' gravado com', len(df_indori), ' quadrienios')