def clean_df_sent(dfSent): #'cnae', for column in ('wage_dec','emp_id','est_id','year'): dfSent = dfSent.drop(column, axis=1) dfSent = left_df(dfSent,'cbo',4,'cbo4') dfSent['cbo'] = dfSent.apply(lambda f : to_number(f['cbo4']) , axis = 1) dfSent['munic']=dfSent[dfSent.columns[0]] dfSent['munic'] = dfSent.apply(lambda f : to_number(f['munic']) , axis = 1) dfSent['munic'] = dfSent['munic'].astype(np.float64) dfSent['wage'] = dfSent.apply(lambda f : to_number(f['wage']) , axis = 1) dfSent['wage'] = dfSent['wage'].astype(np.float64) return dfSent
def openNFe(entrada,delimiter): ''' TransactedProduct_ID_NCM,TransactedProduct_ID_HS,EconomicAtivity_ID_CNAE_Receiver, EconomicAtivity_ID_CNAE_Sender,CFOP_ID,CFOP_Reclassification,CFOP_Flow,Receiver_Type, Sender_Type,Municipality_ID_Receiver,Municipality_ID_Sender,Year,Monthly, Receiver_Situation,Sender_Situation,Cost_Value,ICMS_ST_Value,ICMS_ST_RET_Value,ICMS_Value, IPI_Value,PIS_Value,COFINS_Value,II_Value,Product_Value,ISSQN_Value,Origin ''' orig_cols = ['TransactedProduct_ID_NCM','TransactedProduct_ID_HS','EconomicAtivity_ID_CNAE_Receiver','EconomicAtivity_ID_CNAE_Sender','CFOP_ID','CFOP_Reclassification','CFOP_Flow','Sender_Type','Sender_Type','Municipality_ID_Receiver','Municipality_ID_Sender','Year','Monthly','Sender_Situation','Sender_Situation','Cost_Value','ICMS_ST_Value','ICMS_ST_RET_Value','ICMS_Value','IPI_Value','PIS_Value','COFINS_Value','II_Value','Product_Value','ISSQN_Value','Origin'] converters = {"EconomicAtivity_ID_CNAE_Sender": str, "Municipality_ID_Sender": str, "Product_Value": floatvert} dados = read_from_csv(entrada,delimiter=delimiter,cols=orig_cols,converters=converters,header=1) dados=dados[['EconomicAtivity_ID_CNAE_Sender','Municipality_ID_Sender','Product_Value']] #dados['EconomicAtivity_ID_CNAE_Sender'][ dados.EconomicAtivity_ID_CNAE_Sender=='1'] = '0' #dados['EconomicAtivity_ID_CNAE_Sender'][ dados.EconomicAtivity_ID_CNAE_Sender=='2'] = '0' #dados.EconomicAtivity_ID_CNAE_Sender = dados.EconomicAtivity_ID_CNAE_Sender.apply(lambda x: str(x).zfill(2)) dados = left_df(dados,'EconomicAtivity_ID_CNAE_Sender',2) return dados
def transform(year): cols = ['ANO','MES','HS','PAIS','UF','PORTO','MUNICIPIO','UNIDADE','QUANTIDADE','KGLIQUIDO','VALORFOB'] source_file = 'dados/exportacao/sent/' + str(year) + str('_extract.csv') df = read_from_csv(source_file, 1, ",", cols) df = left_df(df, 'HS', 4) if year == 2000 or year == 2001: ## 1996x2002 rdCols = ['HS96', 'HS02'] rd = read_from_csv('docs/classificacao/HS/anos/1996x2002.csv', 2, ';', rdCols, converters={"HS96": str, "HS02": str}) f = lambda x: rd['HS02'][rd.HS96 == str(x)] df = df.apply(f) # CONVERT TO 2007 rdCols2 = ['HS2007'] rd2 = read_from_csv('docs/classificacao/HS/anos/2007.csv', 2, '|', rdCols2, converters={"HS2007": str}) f2 = lambda x: [rd2.HS96 == str(x)] df = df.apply(f2) print df elif year > 2001 and year <= 2006: ## rdCols = ['HS02', 'HS07'] rd = read_from_csv('docs/classificacao/HS/anos/2002x2007.csv', 1, ';', rdCols, converters={"HS07": str}) f = lambda x: rd['HS02'][rd.HS07 == str(x)] df = df.apply(f) rdCols2 = ['HS2007'] rd2 = read_from_csv('docs/classificacao/HS/anos/2007.csv', 1, ';', rdCols2, converters={"HS2007": str}) f2 = lambda x: [rd2.HS2007 == str(x)] df = df.apply(f2) print df elif year >= 2007 and year <= 2011: rdCols = ['HS2012', 'HS2007'] rd = read_from_csv('docs/classificacao/HS/anos/2012x2007.csv', 1, ';', rdCols, converters={"HS2007": str}) f = lambda x: rd['HS2012'][rd.HS2007 == str(x)] df = df.apply(f) print df elif year >= 2012 and year <= 2014: rdCols = ['HS2012', 'HS2007'] rd = read_from_csv('docs/classificacao/HS/anos/2012x2007.csv', 1, ';', rdCols, converters={"HS2007": str}) f = lambda x: rd['HS2012'][rd.HS2007 == str(x)] df.apply(f) print df