def main(fname, blpath, odir, year, month): print "Applying EI Rules 1 and 2." hdf_filepath = odir + "/%s_%s_store_df.h5" % (year, month) print "LOOKING for HDF file at location ", hdf_filepath if os.path.exists(hdf_filepath): print "READING HDF" ei_df = pd.read_hdf(hdf_filepath, 'ei_df') bl_df = pd.read_hdf(hdf_filepath, 'bl_df') else: ei_df = pd.read_csv(fname, header=0, sep=";", converters=converters, names=cols, quotechar="'", decimal=",") ei_df, bl_df = bl_prepare(ei_df, blpath) print "Doing setup..." ei_df, bl_df = setup(ei_df, bl_df) print "SAVING HDF to", hdf_filepath ei_df.to_hdf(hdf_filepath, 'ei_df') bl_df.to_hdf(hdf_filepath, 'bl_df') print "Entering rule 1..." ei_df = rule1(ei_df, bl_df, RECEIVER) ei_df = rule1(ei_df, bl_df, SENDER) print "Entering rule 2..." ei_df = rule2(ei_df) print ei_df output_values = ["purchase_value", "remit_value", "transfer_value", "devolution_value", "icms_credit_value", "remit_value", "tax", "icms_tax", "transportation_cost", "year", "month"] output_name = "%s_%s" % (year,month) print "Making tables..." ymsrp = make_table(ei_df, "srp", output_values, odir, output_name, year=year, month=month)
def main(fname, odir): print "Reading data frame..." cols = ["ncm", "hs_id", "EconomicAtivity_ID_CNAE_Receiver_5d", "cnae_id_r", "EconomicAtivity_ID_CNAE_Sender_5d", "cnae_id_s", "CFOP_ID", "Receiver_foreign", "Sender_foreign", "bra_id_r", "bra_id_s", "year", "month", "transportation_cost", "ICMS_ST_Value", "ICMS_Value", "IPI_Value", "PIS_Value", "COFINS_Value", "II_Value", "product_value", "ISSQN_Value"] converters = {"hs_id": update_hs_id, "bra_id_s":lookup_location, "bra_id_r":lookup_location, "cnae_id_r": lookup_cnae, "cnae_id_s":lookup_cnae} ei_df, target = _check_hdf_cache(fname, odir) if ei_df is None: ei_df = pd.read_csv(fname, header=0, sep=";", converters=converters, names=cols, quotechar="'", decimal=",") print "Processing..." ei_df['icms_tax'] = ei_df.ICMS_ST_Value + ei_df.ICMS_Value ei_df['tax'] = ei_df.icms_tax + ei_df.IPI_Value + ei_df.PIS_Value + ei_df.COFINS_Value + ei_df.II_Value + ei_df.ISSQN_Value ei_df["purchase_value"] = 0 ei_df["transfer_value"] = 0 ei_df["devolution_value"] = 0 ei_df["icms_credit_value"] = 0 ei_df["remit_value"] = 0 ei_df.loc[ei_df.CFOP_ID == PURCHASES, "purchase_value"] = ei_df.product_value ei_df.loc[ei_df.CFOP_ID == TRANSFERS, "transfer_value"] = ei_df.product_value ei_df.loc[ei_df.CFOP_ID == DEVOLUTIONS, "devolution_value"] = ei_df.product_value ei_df.loc[ei_df.CFOP_ID == CREDITS, "icms_credit_value"] = ei_df.product_value ei_df.loc[ei_df.CFOP_ID == REMITS, "remit_value"] = ei_df.product_value ei_df.to_hdf(target, HDF_CACHE, append=False) print "Aggregating..." primary_key = ['year', 'month', 'bra_id_s', 'cnae_id_s', 'bra_id_r', 'cnae_id_r', 'hs_id'] output_values = ["purchase_value", "transfer_value", "devolution_value", "icms_credit_value", "remit_value", "tax", "icms_tax", "transportation_cost"] output_name = ntpath.basename(fname).replace(".csv", "") print "Making tables..." ymsr = make_table(ei_df, "ymsr", output_values, odir, output_name) yms = make_table(ei_df, "yms", output_values, odir, output_name) ymr = make_table(ei_df, "ymr", output_values, odir, output_name)
def main(fname, odir): print "Reading data frame..." cols = [ "ncm", "hs_id", "EconomicAtivity_ID_CNAE_Receiver_5d", "cnae_id_r", "EconomicAtivity_ID_CNAE_Sender_5d", "cnae_id_s", "CFOP_ID", "Receiver_foreign", "Sender_foreign", "bra_id_r", "bra_id_s", "year", "month", "transportation_cost", "ICMS_ST_Value", "ICMS_Value", "IPI_Value", "PIS_Value", "COFINS_Value", "II_Value", "product_value", "ISSQN_Value" ] converters = { "hs_id": update_hs_id, "bra_id_s": lookup_location, "bra_id_r": lookup_location, "cnae_id_r": lookup_cnae, "cnae_id_s": lookup_cnae } ei_df, target = _check_hdf_cache(fname, odir) if ei_df is None: ei_df = pd.read_csv(fname, header=0, sep=";", converters=converters, names=cols, quotechar="'", decimal=",") print "Processing..." ei_df['icms_tax'] = ei_df.ICMS_ST_Value + ei_df.ICMS_Value ei_df[ 'tax'] = ei_df.icms_tax + ei_df.IPI_Value + ei_df.PIS_Value + ei_df.COFINS_Value + ei_df.II_Value + ei_df.ISSQN_Value ei_df["purchase_value"] = 0 ei_df["transfer_value"] = 0 ei_df["devolution_value"] = 0 ei_df["icms_credit_value"] = 0 ei_df["remit_value"] = 0 ei_df.loc[ei_df.CFOP_ID == PURCHASES, "purchase_value"] = ei_df.product_value ei_df.loc[ei_df.CFOP_ID == TRANSFERS, "transfer_value"] = ei_df.product_value ei_df.loc[ei_df.CFOP_ID == DEVOLUTIONS, "devolution_value"] = ei_df.product_value ei_df.loc[ei_df.CFOP_ID == CREDITS, "icms_credit_value"] = ei_df.product_value ei_df.loc[ei_df.CFOP_ID == REMITS, "remit_value"] = ei_df.product_value ei_df.to_hdf(target, HDF_CACHE, append=False) print "Aggregating..." primary_key = [ 'year', 'month', 'bra_id_s', 'cnae_id_s', 'bra_id_r', 'cnae_id_r', 'hs_id' ] output_values = [ "purchase_value", "transfer_value", "devolution_value", "icms_credit_value", "remit_value", "tax", "icms_tax", "transportation_cost" ] output_name = ntpath.basename(fname).replace(".csv", "") print "Making tables..." ymsr = make_table(ei_df, "ymsr", output_values, odir, output_name) yms = make_table(ei_df, "yms", output_values, odir, output_name) ymr = make_table(ei_df, "ymr", output_values, odir, output_name)
def main(fname, blpath, odir): print "Reading data frame HELLO..." cols = [ "ncm", "hs_id", "EconomicAtivity_ID_CNAE_Receiver_5d", "cnae_id_r", "EconomicAtivity_ID_CNAE_Sender_5d", "cnae_id_s", "CFOP_ID", "Receiver_foreign", "Sender_foreign", "bra_id_r", "bra_id_s", "year", "month", "transportation_cost", "ICMS_ST_Value", "ICMS_Value", "IPI_Value", "PIS_Value", "COFINS_Value", "II_Value", "product_value", "ISSQN_Value" ] converters = { "hs_id": update_hs_id, "bra_id_s": lookup_location, "bra_id_r": lookup_location, "cnae_id_r": lookup_cnae, "cnae_id_s": lookup_cnae } ei_df = pd.read_csv(fname, header=0, sep=";", converters=converters, names=cols, quotechar="'", decimal=",") # -- Do blacklist filtering bl_cols = ["bra_id", "cnae_id", "num_est", "d_bl"] bl_converters = {"bra_id": lookup_location, "cnae_id": lookup_cnae} bl_df = pd.read_csv(blpath, header=0, sep=";", converters=bl_converters, names=bl_cols, quotechar="'", decimal=",") # sender/receiver merge bl ei_df = pd.merge(ei_df, bl_df, how='left', left_on=['bra_id_s', 'cnae_id_s'], right_on=['bra_id', 'cnae_id']) ei_df.cnae_id_s[ei_df.d_bl == 1] = CNAE_BLACKLISTED print "Blacklisting %s sending transactions" % ( ei_df.cnae_id_s[ei_df.d_bl == 1].count()) ei_df = ei_df.drop(labels=bl_cols, axis=1) ei_df = pd.merge(ei_df, bl_df, how='left', left_on=['bra_id_r', 'cnae_id_r'], right_on=['bra_id', 'cnae_id']) print "Blacklisting %s receiving transactions" % ( ei_df.cnae_id_r[ei_df.d_bl == 1].count()) ei_df.cnae_id_r[ei_df.d_bl == 1] = CNAE_BLACKLISTED # -- HS blacklist ei_df.hs_id[(ei_df.cnae_id_r == CNAE_BLACKLISTED) & (ei_df.cnae_id_s == CNAE_BLACKLISTED)] = HS_BLACKLIST print "Blacklisting %s products" % ( ei_df.hs_id[ei_df.hs_id == HS_BLACKLIST].count()) # -- Filter out any rows that are ICMS Credits transactions or transfers # print "Filtering ICMS credits and transfers" print "Processing..." ei_df['icms_tax'] = ei_df.ICMS_ST_Value + ei_df.ICMS_Value ei_df[ 'tax'] = ei_df.icms_tax + ei_df.IPI_Value + ei_df.PIS_Value + ei_df.COFINS_Value + ei_df.II_Value + ei_df.ISSQN_Value ei_df["purchase_value"] = ei_df.apply(lambda x: x["product_value"] if x["CFOP_ID"] == PURCHASES else 0, axis=1) ei_df["transfer_value"] = ei_df.apply(lambda x: x["product_value"] if x["CFOP_ID"] == TRANSFERS else 0, axis=1) ei_df["devolution_value"] = ei_df.apply( lambda x: x["product_value"] if x["CFOP_ID"] == DEVOLUTIONS else 0, axis=1) ei_df["icms_credit_value"] = ei_df.apply(lambda x: x["product_value"] if x["CFOP_ID"] == CREDITS else 0, axis=1) ei_df["remit_value"] = ei_df.apply(lambda x: x["product_value"] if x["CFOP_ID"] == REMITS else 0, axis=1) print "Aggregating..." primary_key = [ 'year', 'month', 'bra_id_s', 'cnae_id_s', 'bra_id_r', 'cnae_id_r', 'hs_id' ] output_values = [ "purchase_value", "transfer_value", "devolution_value", "icms_credit_value", "remit_value", "tax", "icms_tax", "transportation_cost" ] output_name = ntpath.basename(fname).replace(".csv", "") print "Making tables..." ymsrp = make_table(ei_df, "ymsrp", output_values, odir, output_name) ymsr = make_table(ei_df, "ymsr", output_values, odir, output_name) ymsp = make_table(ei_df, "ymsp", output_values, odir, output_name) ymrp = make_table(ei_df, "ymrp", output_values, odir, output_name) yms = make_table(ei_df, "yms", output_values, odir, output_name) ymr = make_table(ei_df, "ymr", output_values, odir, output_name) ymp = make_table(ei_df, "ymp", output_values, odir, output_name)
def main(fname, blpath, odir): print "Reading data frame HELLO..." cols = ["ncm", "hs_id", "EconomicAtivity_ID_CNAE_Receiver_5d", "cnae_id_r", "EconomicAtivity_ID_CNAE_Sender_5d", "cnae_id_s", "CFOP_ID", "Receiver_foreign", "Sender_foreign", "bra_id_r", "bra_id_s", "year", "month", "transportation_cost", "ICMS_ST_Value", "ICMS_Value", "IPI_Value", "PIS_Value", "COFINS_Value", "II_Value", "product_value", "ISSQN_Value"] converters = {"hs_id": update_hs_id, "bra_id_s":lookup_location, "bra_id_r":lookup_location, "cnae_id_r": lookup_cnae, "cnae_id_s":lookup_cnae} ei_df = pd.read_csv(fname, header=0, sep=";", converters=converters, names=cols, quotechar="'", decimal=",") # -- Do blacklist filtering bl_cols = ["bra_id", "cnae_id", "num_est", "d_bl"] bl_converters = {"bra_id" : lookup_location, "cnae_id": lookup_cnae} bl_df = pd.read_csv(blpath, header=0, sep=";", converters=bl_converters, names=bl_cols, quotechar="'", decimal=",") # sender/receiver merge bl ei_df = pd.merge(ei_df, bl_df, how='left', left_on=['bra_id_s','cnae_id_s'], right_on=['bra_id', 'cnae_id']) ei_df.cnae_id_s[ei_df.d_bl == 1] = CNAE_BLACKLISTED print "Blacklisting %s sending transactions" % (ei_df.cnae_id_s[ei_df.d_bl == 1].count()) ei_df = ei_df.drop(labels=bl_cols, axis=1) ei_df = pd.merge(ei_df, bl_df, how='left', left_on=['bra_id_r','cnae_id_r'], right_on=['bra_id', 'cnae_id']) print "Blacklisting %s receiving transactions" % (ei_df.cnae_id_r[ei_df.d_bl == 1].count()) ei_df.cnae_id_r[ei_df.d_bl == 1] = CNAE_BLACKLISTED # -- HS blacklist ei_df.hs_id[(ei_df.cnae_id_r == CNAE_BLACKLISTED) & (ei_df.cnae_id_s == CNAE_BLACKLISTED)] = HS_BLACKLIST print "Blacklisting %s products" % (ei_df.hs_id[ei_df.hs_id == HS_BLACKLIST].count()) # -- Filter out any rows that are ICMS Credits transactions or transfers # print "Filtering ICMS credits and transfers" print "Processing..." ei_df['icms_tax'] = ei_df.ICMS_ST_Value + ei_df.ICMS_Value ei_df['tax'] = ei_df.icms_tax + ei_df.IPI_Value + ei_df.PIS_Value + ei_df.COFINS_Value + ei_df.II_Value + ei_df.ISSQN_Value ei_df["purchase_value"] = ei_df.apply(lambda x: x["product_value"] if x["CFOP_ID"] == PURCHASES else 0, axis=1) ei_df["transfer_value"] = ei_df.apply(lambda x: x["product_value"] if x["CFOP_ID"] == TRANSFERS else 0, axis=1) ei_df["devolution_value"] = ei_df.apply(lambda x: x["product_value"] if x["CFOP_ID"] == DEVOLUTIONS else 0, axis=1) ei_df["icms_credit_value"] = ei_df.apply(lambda x: x["product_value"] if x["CFOP_ID"] == CREDITS else 0, axis=1) ei_df["remit_value"] = ei_df.apply(lambda x: x["product_value"] if x["CFOP_ID"] == REMITS else 0, axis=1) print "Aggregating..." primary_key = ['year', 'month', 'bra_id_s', 'cnae_id_s', 'bra_id_r', 'cnae_id_r', 'hs_id'] output_values = ["purchase_value", "transfer_value", "devolution_value", "icms_credit_value", "remit_value", "tax", "icms_tax", "transportation_cost"] output_name = ntpath.basename(fname).replace(".csv", "") print "Making tables..." ymsrp = make_table(ei_df, "ymsrp", output_values, odir, output_name) ymsr = make_table(ei_df, "ymsr", output_values, odir, output_name) ymsp = make_table(ei_df, "ymsp", output_values, odir, output_name) ymrp = make_table(ei_df, "ymrp", output_values, odir, output_name) yms = make_table(ei_df, "yms", output_values, odir, output_name) ymr = make_table(ei_df, "ymr", output_values, odir, output_name) ymp = make_table(ei_df, "ymp", output_values, odir, output_name)