def write(): st.title("Validation rules") st.write("Add validation rule") options = get_indicators() indicator_selected = st.selectbox("Select Indicator", options=options) realid = indicator_selected[0] currentNaNmethoddf = db.selectfromwhere("realid,nanmethod","parameter", "realid = ?",(realid,)) currentNaNMethod = currentNaNmethoddf['nanmethod'].iloc[0] st.write("Current way to deal with empty values: " + nanMethods[currentNaNMethod]) #if st.button("Update NaN-method"): NaNoptions = ["None selected","Convert to 0", "Convert to no", "Set empty values to inactive"] nanMethod = st.selectbox("Update NaN-method",options = NaNoptions) if st.button("Update method"): if nanMethod == "None selected": db.updatevalues("parameter", "nanmethod = ?", "realid = ?",(0,realid)) elif nanMethod == "Convert to 0": db.updatevalues("parameter", "nanmethod = ?", "realid = ?",(1,realid)) elif nanMethod == "Convert to no": db.updatevalues("parameter", "nanmethod = ?", "realid = ?",(2,realid)) else: db.updatevalues("parameter", "nanmethod = ?", "realid = ?",(3,realid)) indicators = db.selectallfrom("indicator") parameters = db.selectallfrom("parameter") parameters = parameters[parameters["fk_methodid"] == currentMethod] questions = db.selectallfrom("question") valrules = db.selectallfrom("validationrule") valrules = valrules[valrules["fk_methodid"] == currentMethod] parid = parameters[parameters["realid"] == realid]['parameterid'].iloc[0] #ind = questions[questions['realid'] == realid] #parid = questions[questions['fk_parameterid'] == par].iloc[0] #print(parid) temprules = db.selectfromwhere("fk_parameterid,fk_validationruleid","validationruleparameter_map", "fk_parameterid = ?",(int(parid),)) valruleids = temprules['fk_validationruleid'].tolist() validationrules = valrules[valrules['validationruleid'].isin(valruleids)] st.write("Other validation rules: ") rows = selectable_data_table(validationrules[['rule','description']]) if st.button("Delete selected"): for i in rows: db.deletevalue("validationruleparameter_map","fk_parameterid = ? and fk_validationruleid = ?", (int(parid),int(validationrules['validationruleid'].iloc[i]))) newRule = st.text_input("New rule") description = st.text_input("Description") if st.button("Save rule"): db.insertvaluessingle("validationrule(rule,description,fk_methodid)","(?,?,?)",(newRule,description,int(currentMethod))) justAddedrule = db.selectfromwhere("rule,validationruleid","validationrule","rule = ?",(newRule,)) ruleid = justAddedrule['validationruleid'].iloc[0] db.insertvaluessingle("validationruleparameter_map(fk_parameterid,fk_validationruleid)","(?,?)",(int(parid),int(ruleid)))
def applynanMethod(nanmethod, df): if nanmethod == 0: return elif nanmethod == 1: for i in range(len(df)): dataid = df['dataid'].iloc[i] db.updatevalues("data", "isActive = ?", "dataid = ?", (int(0), int(dataid))) versiondf = db.selectfromwhere("dataid,version", "data", "dataid = ?", (int(dataid), )) currentversion = versiondf['version'].iloc[0] tempsplit = currentversion.split(".") fineversion = int(tempsplit[1]) + 1 newVersion = tempsplit[0] + "." + str(fineversion) newValue = "0" year = df['year'].iloc[i] parid = df['fk_parameterid'].iloc[i] projectid = df['fk_projectid'].iloc[i] orgid = df['fk_organisationid'].iloc[i] db.insertvaluessingle( "data(value,year,version,isActive,fk_parameterid,fk_projectid,fk_organisationid)", "(?,?,?,?,?,?,?)", (newValue, year, newVersion, int(1), int(parid), int(projectid), int(orgid))) elif nanmethod == 2: for i in range(len(df)): dataid = df['dataid'].iloc[i] db.updatevalues("data", "isActive = ?", "dataid = ?", (int(0), int(dataid))) versiondf = db.selectfromwhere("dataid,version", "data", "dataid = ?", (int(dataid), )) currentversion = versiondf['version'].iloc[0] tempsplit = currentversion.split(".") fineversion = int(tempsplit[1]) + 1 newVersion = tempsplit[0] + "." + str(fineversion) newValue = "no" year = df['year'].iloc[i] parid = df['fk_parameterid'].iloc[i] projectid = df['fk_projectid'].iloc[i] orgid = df['fk_organisationid'].iloc[i] db.insertvaluessingle( "data(value,year,version,isActive,fk_parameterid,fk_projectid,fk_organisationid)", "(?,?,?,?,?,?,?)", (newValue, year, newVersion, int(1), int(parid), int(projectid), int(orgid))) elif nanmethod == 3: for i in range(len(df)): dataid = df['dataid'].iloc[i] db.updatevalues("data", "isActive = ?", "dataid = ?", (int(0), int(dataid))) else: return
def validateparameter(parameterid,alterations,columns): data = db.selectallfrom("data") datadf = data[data['isActive'] == 1] pardf = datadf[datadf['fk_parameterid'] == parameterid] valrules = db.selectallfrom("validationrule") temprules = db.selectfromwhere("fk_parameterid,fk_validationruleid","validationruleparameter_map", "fk_parameterid = ?",(int(parameterid),)) valruleids = temprules['fk_validationruleid'].tolist() validationrules = valrules[valrules['validationruleid'].isin(valruleids)] for i in range(len(validationrules)): temprule = validationrules['rule'].iloc[i] for j in range(len(pardf)): tempRow = pardf.iloc[j] value = tempRow['value'] dataid = tempRow['dataid'] orgid = tempRow['fk_organisationid'] tempdf = datadf[datadf['fk_organisationid'] == orgid] result = valparse.check(temprule,value,tempdf) if result == False: row = pd.DataFrame([[int(dataid),temprule,value,parameterid]],columns = columns) alterations = alterations.append(row) return alterations
def write(): st.title("Validation") columns = ["dataid","rule","value","parameterid"] # Get all the alterations that are going to be done alterations = pd.DataFrame(columns = columns) parameters = get_indicators() if st.button("Validate!"): for i in range(len(parameters)): realid = parameters[i][0] parameteriddf = db.selectfromwhere("realid,fk_methodid,parameterid","parameter","realid = ? and fk_methodid = ?",(realid,int(currentMethod),)) parameterid = parameteriddf['parameterid'].iloc[0] data = db.selectallfrom("data") datadf = data[data['fk_parameterid'] == parameterid] datadf = datadf[datadf['isActive'] == 1] nanmethoddf = db.selectfromwhere("parameterid,nanmethod","parameter","parameterid = ?",(int(parameterid),)) nanmethod = nanmethoddf['nanmethod'].iloc[0] nandf = datadf[datadf['value'] == "nan"] print("Nandf: " + str(len(nandf))) if len(nandf) > 0: if nanmethod == 0: st.write("There are several NaN values in the data for this indicator, but there has not been a method selected yet how to handle these. Please do so under the validation rules section and come back to this page!") valparse.applynanMethod(nanmethod,nandf) alterations = validateparameter(parameterid,alterations,columns) if len(alterations) > 0: for j in range(len(alterations)): val = alterations['value'].iloc[j] dataid = alterations['dataid'].iloc[j] parid = alterations['parameterid'].iloc[j] rule = alterations['rule'].iloc[j] description = "Rule: " + rule db.insertvaluessingle("alteration(old_val,new_val,description,fk_dataid,fk_projectid)","(?,?,?,?,?)", (val,"?",description,int(dataid),int(currentProject))) else: st.write("All good! According to the validation rules, there are no wrong values.") alts = db.selectallfrom("alteration") alts = alts[alts['fk_projectid'] == currentProject] if len(alts) > 0: interactivetable(alts) else: st.write("Currently no flagged alterations!")
def getQvalue(realid, df): pardf = db.selectfromwhere("realid,fk_methodid,parameterid", "parameter", "realid = ? and fk_methodid = ?", ( realid, int(currentMethod), )) parid = pardf['parameterid'].iloc[0] data = df[df['fk_parameterid'] == parid] result = data['value'].iloc[0] return result
def getParameterid(realid): # Use db class to get parameterid parameterdf = db.selectfromwhere("realid,fk_methodid,parameterid", "parameter", "realid = ? and fk_methodid = ?", ( realid, int(currentMethod), )) result = parameterdf['parameterid'].iloc[0] return result
def parsePeriod(self, variable): currentyeardf = db.selectfromwhere("projectid,currentYear", "project", "projectid = ?", (int(currentProject), )) currentyear = currentyeardf['currentYear'].iloc[0] result = [] if '{' in variable: variable = variable[1:-1] tempSplit = variable.split(",") for i in range(len(tempSplit)): tempString = tempSplit[i] if "currentyear" in tempString: tempString = tempString.replace("currentyear", currentyear) result.append(eval(tempString)) else: if "currentyear" in variable: tempString = variable.replace("currentyear", currentyear) result.append(eval(tempString)) else: result.append(variable) return result
def constructDf(self, type, formula, label, population, period): rawData = db.selectallfrom("data") rawData = rawData[rawData["fk_projectid"] == currentProject] rawData = rawData[rawData['isActive'] == 1] if type == "barchart": dims = self.calculateDimensions(formula, population, period) years = self.parsePeriod(period) labels = self.parseVariable(label) pops = self.parseVariable(population) vars = self.parseVariable(formula) labelVardf = pd.DataFrame(data={'label': labels, 'var': vars}) colPeriod = [] colVariable = [] colLabel = [] colPopulation = [] colValue = [] for year in years: for pop in pops: print(pop) for var in vars: colPeriod.append(year) colPopulation.append(pop) colVariable.append(var) colLabel.append(labelVardf[labelVardf['var'] == var] ['label'].iloc[0]) popiddf = db.selectfromwhere( "name,fk_projectid,populationid", "population", "name = ? and fk_projectid = ?", ( pop, int(currentProject), )) popid = popiddf['populationid'].iloc[0] poporgmapdf = db.selectfromwhere( "fk_populationid,fk_organisationid", "populationorganisation_map", "fk_populationid = ?", (int(popid), )) popOrgList = [] for i in range(len(poporgmapdf)): tempOrg = poporgmapdf["fk_organisationid"].iloc[i] popOrgList.append(tempOrg) tempDF = rawData[rawData['year'] == str(year)] tempDF = tempDF[tempDF['fk_organisationid'].isin( popOrgList)] #st.write(tempDF) #### Calculate the value based on year, pop and var(formula) result = aggparse.interpret(var, tempDF) colValue.append(result) data = { 'Year': colPeriod, 'Variable': colVariable, 'Label': colLabel, 'Population': colPopulation, 'Value': colValue } resultdf = pd.DataFrame(data=data) st.write(resultdf) elif type == "categories": dims = self.calculateDimensions(formula, population, period) years = self.parsePeriod(period) labels = self.parseVariable(label) pops = self.parseVariable(population) vars = self.parseVariable(formula) labelVardf = pd.DataFrame(data={'label': labels, 'var': vars}) colPeriod = [] colVariable = [] colLabel = [] colPopulation = [] colValue = [] for year in years: for pop in pops: for var in vars: colPeriod.append(year) colPopulation.append(pop) colVariable.append(var) colLabel.append(labelVardf[labelVardf['var'] == var] ['label'].iloc[0]) popiddf = db.selectfromwhere( "name,fk_projectid,populationid", "population", "name = ? and fk_projectid = ?", ( pop, int(currentProject), )) popid = popiddf['populationid'].iloc[0] poporgmapdf = db.selectfromwhere( "fk_populationid,fk_organisationid", "populationorganisation_map", "fk_populationid = ?", (int(popid), )) popOrgList = [] for i in range(len(poporgmapdf)): tempOrg = poporgmapdf["fk_organisationid"].iloc[i] popOrgList.append(tempOrg) tempDF = rawData[rawData['year'] == str(year)] tempDF = tempDF[tempDF['fk_organisationid'].isin( popOrgList)] #st.write(tempDF) #### Calculate the value based on year, pop and var(formula) result = aggparse.interpret(var, tempDF) colValue.append(result) data = { 'Year': colPeriod, 'Variable': colVariable, 'Label': colLabel, 'Population': colPopulation, 'Value': colValue } resultdf = pd.DataFrame(data=data) st.write(resultdf) elif type == "table": dims = self.calculateDimensions(formula, population, period) years = self.parsePeriod(period) labels = self.parseVariable(label) pops = self.parseVariable(population) vars = self.parseVariable(formula) labelVardf = pd.DataFrame(data={'label': labels, 'var': vars}) colPeriod = [] colVariable = [] colLabel = [] colPopulation = [] colValue = [] for year in years: for pop in pops: for var in vars: colPeriod.append(year) colPopulation.append(pop) colVariable.append(var) colLabel.append(labelVardf[labelVardf['var'] == var] ['label'].iloc[0]) popiddf = db.selectfromwhere( "name,fk_projectid,populationid", "population", "name = ? and fk_projectid = ?", ( pop, int(currentProject), )) popid = popiddf['populationid'].iloc[0] poporgmapdf = db.selectfromwhere( "fk_populationid,fk_organisationid", "populationorganisation_map", "fk_populationid = ?", (int(popid), )) popOrgList = [] for i in range(len(poporgmapdf)): tempOrg = poporgmapdf["fk_organisationid"].iloc[i] popOrgList.append(tempOrg) tempDF = rawData[rawData['year'] == str(year)] tempDF = tempDF[tempDF['fk_organisationid'].isin( popOrgList)] #st.write(tempDF) #### Calculate the value based on year, pop and var(formula) result = aggparse.interpret(var, tempDF) colValue.append(result) data = { 'Year': colPeriod, 'Variable': colVariable, 'Label': colLabel, 'Population': colPopulation, 'Value': colValue } resultdf = pd.DataFrame(data=data) st.write(resultdf) else: return return resultdf