def test_Municipality(self,year,size,column): print "Entering in checkBRA" dfDV = sql_to_df("SELECT a.id_mdic as id,sum(export_val) as val,sum(import_val) as valimport FROM secex_ymb s,attrs_bra a where s.bra_id_len="+str(size)+" and a.id=s.bra_id and s.month=0 and s.year="+str(year)+" group by 1;",db) dfSent = read_from_csv("dados\exportacao\sent\MDIC_"+str(year)+".csv",delimiter="|") dfGroup = dfSent.groupby(dfSent.columns[column]) total=0 for mdic in dfDV['id']: mdicid=to_int(mdic) if not mdicid: #not isinstance(hs,int): continue valDV = dfDV[(dfDV['id']==mdicid)]['val'].values[0] try: if valDV and str(valDV)<>'nan': valCSV= dfGroup.get_group(mdicid)[dfSent.columns[9]].sum() except: total=total+1 print "Not found in CSV a value for "+str(mdic)+" - (original bra "+str(mdic)+") Exports of value "+ str(valDV)+ " in the year "+str(year) continue valCSV=to_int(valCSV) valDV=to_int(valDV) if valCSV and valDV and valDV!=valCSV: total=total+1 txt= "ERROR in BRA ("+str(year)+"): "+str(mdic)+" / "+str(mdicid)+" - Value in CSV "+ str(valCSV)+ " <> Value in DV "+str(valDV) + " - Difference: "+str(valCSV - valDV) print txt else: txt="OK" self.assertEqual(total, 0)
def test_HS(self,year): print "Entering in checkHS" dfDV = sql_to_df("SELECT s.hs_id as id,sum(export_val) as val,sum(import_val) as valimport FROM secex_ymp s where s.hs_id_len=6 and s.month=0 and s.year="+str(year)+" group by 1;",db) dfSent = read_from_csv("dados\exportacao\sent\MDIC_"+str(year)+".csv",delimiter="|") dfGroup = dfSent.groupby(dfSent.columns[10]) total=0 for hs in dfDV['id']: hsid= str(hs).zfill(6) hsshort=hsid[2:6] hsint=to_int(hsshort) if not hsint: #not isinstance(hs,int): continue valDV = dfDV[(dfDV['id']==hsid)]['val'].values[0] try: if valDV and str(valDV)<>'nan': valCSV= sum(dfGroup.get_group(hsint)[dfSent.columns[9]]) except: total=total+1 print "Not found in CSV a value for "+str(hsint)+" / "+str(hsshort)+" (original hs "+hs+") - Exports of value "+ str(valDV)+ " in the year "+str(year) continue valCSV=to_int(valCSV) valDV=to_int(valDV) if valCSV and valDV and valDV!=valCSV: total=total+1 txt= "ERROR in HS ("+str(year)+"): "+str(hsint)+" / "+str(hs)+" - Value in CSV "+ str(valCSV)+ " <> Value in DV "+str(valDV) + " - Difference: "+str(valCSV - valDV) print txt else: txt="OK" #print txt self.assertEqual(total, 0)
def test_main(self): #"Brasil;;;2.314;" cols=['bra','bra_sub1','bra_sub2','value',5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39] idsites=['bra','bra_sub1','bra_sub2','value'] dfSent = read_from_csv("docs\\check\\educacaosuperior\\sinopse_da_educacao_superior_2009-1.1.csv",delimiter=";",cols=cols,usecols=cols) dfSent=dfSent.drop(['bra_sub1','bra_sub2'],axis=1) dfSent = dfSent.dropna(thresh=2) mapStates=getMapStates() format = lambda x: city_fix(x,mapStates) dfSent['bra']= dfSent["bra"].map(format) dfSent['value'] = dfSent.apply(lambda f : to_number(f['value']) , axis = 1) dfSent['value'] = dfSent['value'].astype(np.float64) sql="SELECT bra_id as id,sum(enrolled) as value FROM hedu_ybucd where bra_id='4mg' and bra_id_len=3 AND d_id in ('A','B') and course_id_len=6 group by 1" dfDV = sql_to_df(sql,db) dfDV['value'] = dfDV['value'].astype(np.float64) total=run_check(dfDV,dfSent,'bra',0,'value') self.assertEqual(total, 0)
def test_Municipality(self,year,size): print "Entering in checkBRA" dfDV = sql_to_df("SELECT left(a.id_ibge,6) as id,sum(wage) as val FROM rais_yb s,attrs_bra a where s.bra_id_len="+str(size)+" and a.id=s.bra_id and year="+str(year)+" group by 1;",db) dfSent = read_from_csv("dados\emprego\sent\Rais"+str(year)+".csv",delimiter=";",cols=cols) #dfSent = read_from_csv("dados\emprego\sent\Rais"+str(year)+"Teste.csv",delimiter=",") total=run_check(dfDV,dfSent,'munic',year) self.assertEqual(total, 0)
def test_CNAE(self,year): print "Entering in checkCNAE" #before right 4 and len 5 dfDV = sql_to_df("SELECT right(s.cnae_id,4) as id,sum(wage) as val FROM rais_yi s where s.cnae_id_len=6 and year="+str(year)+" group by 1;",db) dfSent = read_from_csv("dados\emprego\sent\Rais"+str(year)+".csv",delimiter=";",cols=cols) total=run_check(dfDV,dfSent,'cnae',year) self.assertEqual(total, 0)
def test_CBO(self,year): print "Entering in checkCBO" dfDV = sql_to_df("SELECT s.cbo_id as id,sum(wage) as val FROM rais_yo s where s.cbo_id_len=4 and s.year="+str(year)+" group by 1;",db) dfSent = read_from_csv("dados\emprego\sent\Rais"+str(year)+".csv",delimiter=";",cols=cols) total=run_check(dfDV,dfSent,'cbo',year) self.assertEqual(total, 0)