class TopsisEvaluator(BaseEvaluator): def __init__(self, candidate_names=None): self.candidate_names = candidate_names self.data_topsis = None def get_optimal_candidates(self, candidates, criteria): names = self.candidate_names.iloc[candidates.index].values min_max = [] weights = [] columns = [] for c in criteria: if c.maximize: min_max.append(MAX) else: min_max.append(MIN) weights.append(c.weight) columns.append(c.index) self.data_topsis = Data(candidates[columns].as_matrix().tolist(), min_max, weights=weights, anames=names, cnames=columns) model = closeness.TOPSIS() choice = model.decide(self.data_topsis) return [candidates.index[int(choice.best_alternative_)] ] # Return must be a list def print(self): if self.data_topsis != None: self.data_topsis.plot()
def get_ranking(): # Use a service account cred = credentials.Certificate('merlin-c4fa7-firebase-adminsdk-7cfbn-8323034d25.json') firebase_admin.initialize_app(cred) db = firestore.client() user_ref = db.collection(u'users') docs = user_ref.stream() users_data = users_data.loc[:, ['ID', 'skills_score', 'work_experience', 'rating', 'origin']] users_data.head(10) criteria_data = Data( users_data.iloc[:, 1:], # the pandas dataframe [MAX, MAX, MAX,MIN], # direction of goodness for each column anames = users_data['ID'], # each entity's name, here userId cnames = users_data.columns[1:], # attribute/column name # weights=[1,1,1,1,1] # weights for each attribute (optional) ) dm = simple.WeightedSum(mnorm="sum") dec = dm.decide(criteria_data) user_list = [] for doc in docs: return jsonify({user_list.append(doc.to_dict())})
def leaderboard(): s_name = [] s_roll = [] name = [] clarity = [] Brightness = [] Pixel = [] Contrast = [] Resolution = [] Vignette = [] for i in user_data.find(): s_name.append(i['Name']) s_roll.append(i['Roll_No']) name.append(i['Image_Name']) clarity.append(float(i['clarity'])) Brightness.append(float(i['Brightness'])) Pixel.append(float(i['Pixel'])) Contrast.append(float(i['Contrast'])) Resolution.append(float(i['Resolution'])) Vignette.append(float(i['Vignette'])) df = pd.DataFrame({ 'image_name': name, 'clarity': clarity, 'Brightness': Brightness, 'Pixel': Pixel, 'Contrast': Contrast, 'Resolution': Resolution, 'Vignette': Vignette }) criteria = [MAX, MAX, MAX, MAX, MIN, MAX] ds = np.array(df) ds1 = ds[:, 1:] data = Data(ds1, criteria, weights=[ float(1.0) / 6, float(1.0) / 6, float(1.0) / 6, float(1.0) / 6, float(1.0) / 6, float(1.0) / 6 ], anames=ds[:, -1], cnames=[ "Brightness", "Contrast", "Pixel", "Resolution", "Vignette", "Clarity" ]) t = closeness.TOPSIS() dec = t.decide(data) rank = dec.rank_ y = rank.astype(np.int) topsis_score = dec.e_.closeness name = ds[:, 0] result = [s_name, s_roll, y, topsis_score] result = np.array(result) result = result.T final = result[result[:, 2].argsort()] return render_template("leaderboard.html", result=final)
def execute(self): alternative_names = self._X.columns.tolist() criterion_names = list(self._key_features.keys()) criteria = [MAX for i in criterion_names] weights = [ i / len(criterion_names) for i in range(len(criterion_names)) ] df = pd.DataFrame(self._key_features, index=alternative_names) data = Data(df.as_matrix(), criteria, weights, anames=df.index.tolist(), cnames=df.columns) #if self._verbose: #data.plot("radar"); dm1 = simple.WeightedSum() dm2 = simple.WeightedProduct() dm3 = closeness.TOPSIS() dec1 = dm1.decide(data) dec2 = dm2.decide(data) dec3 = dm3.decide(data) ranks = [dec1.rank_, dec2.rank_, dec3.rank_] self._ensemble_results = pd.DataFrame( { "TOPSIS": dec3.rank_, "WeightedSum": dec1.rank_, "WeightedProduct": dec2.rank_ }, index=df.index.tolist()) # Only keep features that our decision makers deemed in the top % specified num_features_requested = math.ceil( len(alternative_names) * self._featurePercentage) sum_ranks = sum(ranks) argmin_sorted = np.argpartition(sum_ranks, num_features_requested) self._kept_features = [] count = 0 for i in argmin_sorted: self._kept_features.append(alternative_names[i]) count += 1 if count >= num_features_requested: break if self._verbose: print("", self._featurePercentage * 100, " % -> (" + str(num_features_requested) + ") features kept.") print(self._kept_features) return self._ensemble_results, self._kept_features
def perform_topsis(raw_data, survey_data): SITE_ROOT = os.path.dirname(os.path.realpath(__file__)) df = pd.read_csv(SITE_ROOT + "/data/cleaned_data.csv") matrix = [] ids = [] # for matrix: # overall weather diff avg | crimerate | nightlife score for school in raw_data['schools']: cur_id = school['id'] ids.append(cur_id) winter_temp = float(df.loc[df['UNITID'] == np.int64(int(cur_id)), 'WINTER_TAVG'].iloc[0]) spring_temp = float(df.loc[df['UNITID'] == np.int64(int(cur_id)), 'SPRING_TAVG'].iloc[0]) summer_temp = float(df.loc[df['UNITID'] == np.int64(int(cur_id)), 'SUMMER_TAVG'].iloc[0]) fall_temp = float(df.loc[df['UNITID'] == np.int64(int(cur_id)), 'FALL_TAVG'].iloc[0]) winter_diff = abs(float(survey_data['winter']) - winter_temp) spring_diff = abs(float(survey_data['spring']) - spring_temp) summer_diff = abs(float(survey_data['summer']) - summer_temp) fall_diff = abs(float(survey_data['fall']) - fall_temp) diff = (winter_diff + spring_diff + summer_diff + fall_diff) / 4.0 max_crimerate = float(df['CRIME_COUNT'].max()) crimerate = float(df.loc[df['UNITID'] == np.int64(int(cur_id)), 'CRIME_COUNT'].iloc[0]) matrix.append([ diff, crimerate / max_crimerate, get_bar_data(school['lat'], school['lon']) ]) #print ( matrix ) criteria = [MIN, MIN, MAX] data = Data(matrix, criteria, weights=[ float(raw_data['weather']['importance']), float(raw_data['crime']['importance']), float(raw_data['nightlife']['importance']) ], anames=ids, cnames=["weather", "crime", "nightlife"]) analysis = closeness.TOPSIS() res = analysis.decide(data) #print( res ) rank_list = res.rank_.tolist() sorted_ids = [None] * len(ids) for i in range(0, len(rank_list)): sorted_ids[rank_list[i] - 1] = int(ids[i]) #print( sorted_ids ) return sorted_ids
def getBestRankedCandidate(nbToExtract, candidateSet, rulesSetsQuality, criteriaQuality): # print(str(candidateSet)) # print(str(rulesSetsCandidates)) tmpRulesSetsQuality = None criteria = [] idAttributesToRemove = [] for idCrit in range(len(criteriaQuality)): if criteriaQuality[idCrit] == '+': criteria.append(MAX) elif criteriaQuality[idCrit] == '-': criteria.append(MIN) else: idAttributesToRemove.append(idCrit) attributes = removeAttributeId(['Polarity', 'Diversity', 'Distancing', 'Surprise'], idAttributesToRemove) if len(attributes) < 4: tmpRulesSetsQuality = rulesSetsQuality rulesSetsQuality = [] for ruleQI in tmpRulesSetsQuality: newQI = removeAttributeId(ruleQI, idAttributesToRemove) rulesSetsQuality.append(newQI) print(str(attributes)) candidateIds = [i for i in range(len(candidateSet))] data = Data(rulesSetsQuality, criteria, anames=candidateIds, cnames=attributes) # apply a simple weighted sums method dm = simple.WeightedSum() res = dm.decide(data) current_ranking = res.rank_ # extract bests candidates if nbToExtract > len(current_ranking): nbToExtract = len(current_ranking) bestCandidates = [] for i in range(nbToExtract): bestCandidateId = list(current_ranking).index(i+1) bestCandidate = candidateSet[bestCandidateId] if tmpRulesSetsQuality is None: bestCandidateQI = rulesSetsQuality[bestCandidateId] else: bestCandidateQI = tmpRulesSetsQuality[bestCandidateId] bestCandidates.append((bestCandidate, bestCandidateQI)) return bestCandidates
def setUp(self): # Data From: # Munier, N., Carignano, C., & Alberto, C. # UN MÉTODO DE PROGRAMACIÓN MULTIOBJETIVO. # Revista de la Escuela de Perfeccionamiento en Investigación # Operativa, 24(39). self.data = Data( mtx=[[250, 120, 20, 800], [130, 200, 40, 1000], [350, 340, 15, 600]], criteria=[max, max, min, max], anames=["Proyecto 1", "Proyecto 2", "Proyecto 3"], cnames=["Criterio 1", "Criterio 2", "Criterio 3", "Criterio 4"]) self.b = [None, 500, None, None]
def decide(mtx, alternatives, weights): criteria_names = [ 'control of_corruption', 'political_stability', 'inflation rate', 'gdp_growth', 'ease_of_doing_business', 'unemployment_rate', 'individuals_using_the_internet' ] _criteria = [MAX, MAX, MIN, MAX, MAX, MIN, MAX] data = Data(mtx=mtx, criteria=_criteria, weights=weights, anames=alternatives, cnames=criteria_names) topsis = TOPSIS() decision = topsis.decide(data) return decision.rank_
def rank_label(df_lab, wgt): criteria_data = Data( df_lab.iloc[:, 1:5], # the pandas dataframe [MIN, MIN, MIN, MIN], # direction of goodness for each column anames = df_lab['Index'], # each entity's name, here car name cnames = df_lab.columns[1:5], # attribute/column name weights=wgt # weights for each attribute (optional) ) df_lab_copy = df_lab.copy() # weighted sum, sumNorm dm = simple.WeightedSum(mnorm="sum") #print(dm.tolist()) dec = dm.decide(criteria_data) #print(dec) df_lab_copy.loc[:, 'rank_weightedSum_sumNorm_inverse'] = dec.rank_ # weighted sum, maxNorm dm = simple.WeightedSum(mnorm="max") dec = dm.decide(criteria_data) df_lab_copy.loc[:, 'rank_weightedSum_maxNorm_inverse'] = dec.rank_ # weighted product, sumNorm dm = simple.WeightedProduct(mnorm="sum") dec = dm.decide(criteria_data) df_lab_copy.loc[:, 'rank_weightedProduct_sumNorm_inverse'] = dec.rank_ # weighted product, sumNorm dm = simple.WeightedProduct(mnorm="max") dec = dm.decide(criteria_data) df_lab_copy.loc[:, 'rank_weightedProduct_maxNorm_inverse'] = dec.rank_ # sort for better visualization df_lab_copy.sort_values(by=['rank_weightedSum_sumNorm_inverse'], inplace=True) return df_lab_copy
def calculate(priority, products): new_scores = {} scores = products['scores'] product_ids = list(scores.keys()) if len(product_ids) == 0: return first_product_id = product_ids[0] entity = list(scores[first_product_id].keys()) criteria = [MAX] * len(entity) matrix = Topsis.create_matrix(scores) priority = Topsis.reshape_priority(priority) data = Data(matrix, criteria, weights=priority, anames=product_ids, cnames=entity) dm = closeness.TOPSIS(mnorm="sum") decision = dm.decide(data) ranks = decision.rank_.tolist() for idx in range(len(ranks)): pid = product_ids[idx] new_scores[pid] = { 'pid': pid, 'rank': ranks[idx], 'attrs': scores[pid] } products['scores'] = new_scores return decision
def calculate(id): # fetch & preprocess project = mongo.db.projects.find_one_or_404({"id": id}) projectCharacteristics = {p["id"]: p for p in project["characteristics"]} cnames = [*projectCharacteristics] # fetch & preprocess characteristics cursor = mongo.db.characteristics.find({"id": {"$in": cnames}}) characteristics = {d["id"]: d for d in cursor} # fetch & preprocess methodchunks cursor = mongo.db.methodchunks.find( {"characteristics.id": { "$in": cnames }}) method_chunks = {} for document in cursor: document["characteristics"] = { d["id"]: d for d in document["characteristics"] } method_chunks[document["id"]] = document # create encoder from sklearn.preprocessing import OrdinalEncoder for cid, pc in projectCharacteristics.items(): if (pc["rule"] == "preference_list"): pass elif (pc["rule"] == "exact"): pass else: #maximum, minimum if (cid in characteristics): for cv in characteristics[cid]["characteristicValues"]: if (cv["ref"] == pc["ref"]): if (pc["rule"] == "maximum"): pc["value"] = list(reversed(cv["values"])) else: pc["value"] = cv["values"] break values = pc["value"] + ["N/A"] values.reverse() # ordinal values asc order (smallest to largest) enc = OrdinalEncoder(categories=[values]) enc.fit([[v] for v in values]) pc["encoder"] = enc # build mtx import pandas as pd df = pd.DataFrame([], columns=cnames) for mid, m in method_chunks.items(): obj = {} for cid, pc in projectCharacteristics.items(): if (cid in m["characteristics"]): if (pc["ref"] == m["characteristics"][cid]["ref"]): obj[cid] = m["characteristics"][cid]["value"] df = df.append(pd.Series(obj, index=df.columns, name=mid)) df.fillna("N/A", inplace=True) print(df) separator() if len(df.index.values) == 0: return "No match" # apply encoding encoded = df.copy() for key, value in encoded.items(): values = [ v if v in projectCharacteristics[key]["value"] else "N/A" for v in value ] encoded.loc[:, key] = projectCharacteristics[key]["encoder"].transform( [[v] for v in values]) encoded = encoded.loc[:, (encoded != 0).any(axis=0)] # print(encoded) # separator() # construct from skcriteria import Data, MAX from skcriteria.madm import simple, closeness optimal_senses = [] weights = [] for cid, pc in encoded.items(): optimal_senses.append(MAX) weights.append(projectCharacteristics[cid].get("weight", 1)) data = Data(encoded.values, optimal_senses, weights=weights, anames=encoded.index, cnames=encoded.columns) #print(data) #separator() # WeightedSum model = simple.WeightedSum(mnorm="vector", wnorm="sum") de = model.decide(data) print(de) separator() print(de.e_) print("Points:", de.e_.points) # TOPSIS model2 = closeness.TOPSIS(mnorm="vector", wnorm="sum") de2 = model2.decide(data) print(de2) separator() print(de2.e_) print("Ideal:", de2.e_.ideal) print("Anti-Ideal:", de2.e_.anti_ideal) print("Closeness:", de2.e_.closeness) # build response res = {} for cid, mc in method_chunks.items(): mc["characteristics"] = [c for cid, c in mc["characteristics"].items()] z = [{ "methodChunk": method_chunks[de._data._anames[i]], "score": de.e_.points[i], "rank": int(de._rank[i]) } for i in range(0, len(de.mtx))] z2 = [{ "methodChunk": method_chunks[de2._data._anames[i]], "score": de2.e_.closeness[i], "rank": int(de2._rank[i]) } for i in range(0, len(de2.mtx))] res["results"] = [{ "model": "WeightedSum", "values": sorted(z, key=lambda x: x["rank"]) }, { "model": "TOPSIS", "values": sorted(z2, key=lambda x: x["rank"]) }] for cid, pc in projectCharacteristics.items(): pc.pop("_id", None) pc["encoder"] = pc["encoder"].categories[0] project["characteristics"] = [ pc for cid, pc in projectCharacteristics.items() ] project.pop("_id", None) res["project"] = project # print(res) # tes = {} # tes["project"] = projectCharacteristics # tes["method_chunks"] = method_chunks # tes["characteristics"] = characteristics default = lambda o: f"<<non-serializable: {type(o).__qualname__}>>" result = json.loads(json_util.dumps(res, default=default)) return result
#connect collection collection = db.sales mongo = collection.find() result = pd.DataFrame(list(mongo)) #create function criteria = [MAX, MIN, MIN, MAX, MAX] alt = result.loc[ 0:100, ['Quantity', 'NormalPrice', 'LenghtDiscount', 'Revenue', 'AcceptDiscount']] mtx = alt.values anames = result.loc[0:100, 'Description'].values cnames = list(alt) weights = [0.3, 0.1, 0.2, 0.3, 0.1] data = Data(mtx, criteria, weights=weights, anames=anames, cnames=cnames) dm = simple.WeightedSum() dec = dm.decide(data) # dtp = round(dec.e_.points, 2) dt = zip(dec.data.anames, dec._rank, dec.e_.points) import os file_path = "C:/AppBoreal/" directory = os.path.dirname(file_path) def savetoxls():
def loop(self): """Periodic job.""" if self.__mcda_descriptor is not None and self.__active: # Step 1: creating structure to handle all metrics self.create_mcda_structure() # Step 2: Update WTP/LVAP association map self.update_wtp_association_map() # Step 3: for each criteria, get all metrics and populate structure for crr_criteria in self.__mcda_descriptor['criteria']: if crr_criteria == 'wtp_load_measured_mbps': if not self.get_wtp_load_measurements(): return elif crr_criteria == 'wtp_queue_delay_ms': if not self.get_wtp_queue_delay_measurements(): return elif crr_criteria == 'wtp_channel_load_rate': if not self.get_wtp_channel_load_measurements(): return elif crr_criteria == 'wtp_sta_rssi_dbm': if not self.get_lvap_rssi_measurements(): return elif crr_criteria == 'wtp_load_expected_mbps': self.initialize_wtp_load_expected() elif crr_criteria == 'sta_association_flag': self.get_sta_association_flag() # Step 4: get all flows from flow manager APP if self.get_flow_handler(): if self.__flow_handler['flows'] is not None: # Step 5: Compute WTP expected load if present in the criteria if 'wtp_load_expected_mbps' in self.__mcda_descriptor['criteria']: self.compute_wtp_load_expected_mbps() # Step 6: for each lvap in the network, get a decision using the TOPSIS method for lvap in self.lvaps(): crr_lvap_addr = str(lvap.addr) # Create MCDA structure mtx = [] wtp_addresses = [] for crr_wtp_addr in self.__mcda_handover_manager['wtps']: wtp_addresses.append(crr_wtp_addr) if crr_lvap_addr in self.__mcda_handover_manager['wtps'][crr_wtp_addr]['lvaps']: mtx.append( self.__mcda_handover_manager['wtps'][crr_wtp_addr]['lvaps'][crr_lvap_addr]['metrics'][ 'values']) # if any of the (active) flows in this LVAP is QoS, use QoS weights # otherwise, stick with the BE weights mcda_weights = self.__mcda_descriptor['weights_be'] if crr_lvap_addr in self.__flow_handler['lvap_flow_map']: if any(i in self.__flow_handler['lvap_flow_map'][crr_lvap_addr] for i in self.__flow_handler['qos_flows']): mcda_weights = self.__mcda_descriptor['weights_qos'] # Lists must have the same length data = Data(mtx, self.__mcda_targets, weights=mcda_weights, anames=wtp_addresses, cnames=self.__mcda_descriptor['criteria']) dm = closeness.TOPSIS() dec = dm.decide(data) best_alternative_wtp_addr = data.anames[dec.best_alternative_] if self.__db_monitor: for i in range(0, len(mtx)): closeness_list = dec.e_.closeness.tolist() ranks = dec.rank_.tolist() closeness_res = closeness_list[i] if math.isnan(closeness_res): closeness_res = None fields = ['LVAP_ADDR', 'WTP_ADDR'] + \ self.__mcda_descriptor['criteria'] + \ ['RANK', 'CLOSENESS'] values = [crr_lvap_addr, wtp_addresses[i]] + mtx[i] + [ranks[i], closeness_res] # Saving into db self.monitor.insert_into_db(table='mcda_results', fields=fields, values=values) # # TODO: Improve writing info... # f = open(self.__mcda_results_filename, 'w+') # f.write('Decision for LVAP: ' + crr_lvap_addr + '\n' + str( # dec) + '\nMove to WTP: ' + best_alternative_wtp_addr + '\n') # f.close() # Step 7: is handover needed? Do it and set the flag to 0 for all other blocks # (this could be improved, but get block with given address should be implemented) # Compute WTP expected load if present in the criteria if 'sta_association_flag' in self.__mcda_descriptor['criteria']: sta_association_index = self.__mcda_descriptor['criteria'].index('sta_association_flag') old_wtp_addr = None for block in self.blocks(): crr_wtp_addr = str(block.addr) if lvap.blocks[0] is not None: if crr_wtp_addr == best_alternative_wtp_addr: # Do handover to this block only if the station is not connected to it sta_crr_wtp_addr = str(lvap.blocks[0].addr) if sta_crr_wtp_addr != best_alternative_wtp_addr: self.log.info("Handover triggered!") old_wtp_addr = sta_crr_wtp_addr # Handover now.. lvap.blocks = block # and update metrics if 'sta_association_flag' in self.__mcda_descriptor['criteria']: self.__mcda_handover_manager['wtps'][crr_wtp_addr]['lvaps'][crr_lvap_addr]['metrics'][ 'values'][sta_association_index] = 1 elif 'sta_association_flag' in self.__mcda_descriptor['criteria']: self.__mcda_handover_manager['wtps'][crr_wtp_addr]['lvaps'][crr_lvap_addr]['metrics'][ 'values'][sta_association_index] = 0 # Recalculate WTP expected load on handover, if any... # OBS: not possible to access lvap.blocks[0] while performing handover if 'wtp_load_expected_mbps' in self.__mcda_descriptor['criteria']: if old_wtp_addr is not None: self.recalculate_wtp_load_expected_mbps(old_wtp_addr=old_wtp_addr, best_alternative_wtp_addr=best_alternative_wtp_addr, moving_lvap_addr=crr_lvap_addr) # Start considering association and expected load from now on... if self.__initial_association: self.__initial_association = False if self.__db_monitor is not None: fields = self.__mcda_descriptor['criteria'] + ['TYPE'] values = self.__mcda_descriptor['weights_qos'] + ['QoS'] # Saving into db self.monitor.insert_into_db(table='mcda_weights', fields=fields, values=values) fields = self.__mcda_descriptor['criteria'] + ['TYPE'] values = self.__mcda_descriptor['weights_be'] + ['BE'] # Saving into db self.monitor.insert_into_db(table='mcda_weights', fields=fields, values=values) # Keeping only the last measurements in db self.monitor.keep_last_measurements_only('mcda_association_stats') self.monitor.keep_last_measurements_only('mcda_results') self.monitor.keep_last_measurements_only('mcda_weights')
patent_data[['similarity_value' ]] = pd.DataFrame(np.array([0.86, 0.83, 0.86, 0.86])) print(patent_data) print("+++++++") patent_data = patent_data.drop('priority_date_year', 1) print(patent_data) print(patent_data.iloc[:, 1:5]) print(patent_data.columns) # # ## project the goodness for each column criteria_data = Data(patent_data.iloc[:, 1:7], [MAX, MAX, MAX, MAX, MAX, MAX], anames=patent_data['patent_number'], cnames=patent_data.columns[1:7], weights=[0.1, 0.3, 0.1, 0.1, 0.1, 0.3]) ##assign weights to attributes print(criteria_data) def normalize_data(logic): df = patent_data.iloc[:, 1:6].values.copy() # print(df) if logic == "minmax": normalized_data = minmax_scale(df) # normalized_data[:, 6] = 1 - normalized_data[:, 6] ##for the min feature elif logic == "sumNorm": normalized_data = df / df.sum(axis=0) # normalized_data[:, 6] = 1 / normalized_data[:, 6] elif logic == "maxNorm":
def flow(self, models_to_flow=[], params=None, test_size=0.2, nfolds=3, nrepeats=3, n_jobs=1, metrics=[], verbose=False, regressors=True, ensemble=False, featurePercentage=0.25): # Enforce parameters assert isinstance(nfolds, int), "nfolds must be integer" assert isinstance(nrepeats, int), "nrepeats must be integer" assert isinstance(n_jobs, int), "n_jobs must be integer" assert isinstance(verbose, bool), "verbosem ust be bool" assert isinstance(params, dict), "params must be a dict" assert isinstance(test_size, float), "test_size must be a float" assert isinstance(metrics, list), "model scoring must be a list" assert isinstance(regressors, bool), "regressor must be bool" assert isinstance(ensemble, bool), "ensemble must be bool" # Enforce logic for regressors #if regressors: # assert(not any(["c" in k.split("__") for k,v in params.items()]), "You selected classifiers with the regressors flag true. Comon\' man!") #else: # assert(not any(["r" in k.split("__") for k,v in params.items()]), "You selected regressors with the regressors flag false. Comon\' man!") self._nfolds = nfolds self._nrepeats = nrepeats self._n_jobs = n_jobs self._verbose = verbose self._allParams = params self._metrics = metrics self._test_size = test_size self._regressors = regressors self._ensemble = ensemble self._featurePercentage = featurePercentage # Inform the streamline to user. stringbuilder = "" for thing in models_to_flow: stringbuilder += thing stringbuilder += " --> " if self._verbose: if self._regressors: print("*************************") print("=> (Regressor) " + "=> Feature Selection Streamline: " + stringbuilder[:-5]) print("*************************") elif self._regressors == False: print("*************************") print("=> (Classifier) " + "=> Feature Selection Streamline: " + stringbuilder[:-5]) print("*************************") else: print( "Invalid model selected. Please set regressors=True or regressors=False." ) print def supportVectorRegression(): self._svr_params = {} for k, v in self._allParams.items(): if "svr" in k: self._svr_params[k] = v self._svr_params["svr__kernel"] = ['linear'] model = SupportVectorRegressorPredictiveModel( self._X_train, self._y_train, self._svr_params, self._nfolds, self._n_jobs, self._verbose) return abs(model.getBestEstimator().coef_.flatten()) def randomForestRegression(): self._rfr_params = {} for k, v in self._allParams.items(): if "rfr" in k: self._rfr_params[k] = v model = RandomForestRegressorPredictiveModel( self._X_train, self._y_train, self._rfr_params, self._nfolds, self._n_jobs, self._verbose) return abs(model.getBestEstimator().feature_importances_.flatten()) def adaptiveBoostingRegression(): self._abr_params = {} for k, v in self._allParams.items(): if "abr" in k: self._abr_params[k] = v model = AdaptiveBoostingRegressorPredictiveModel( self._X_train, self._y_train, self._abr_params, self._nfolds, self._n_jobs, self._verbose) return abs(model.getBestEstimator().feature_importances_.flatten()) def lassoRegression(): self._lasso_params = {} for k, v in self._allParams.items(): if "lasso" in k: self._lasso_params[k] = v model = LassoRegressorPredictiveModel(self._X_train, self._y_train, self._lasso_params, self._nfolds, self._n_jobs, self._verbose) return abs(model.getBestEstimator().coef_.flatten()) def elasticNetRegression(): self._enet_params = {} for k, v in self._allParams.items(): if "enet" in k: self._enet_params[k] = v model = ElasticNetRegressorPredictiveModel( self._X_train, self._y_train, self._enet_params, self._nfolds, self._n_jobs, self._verbose) return abs(model.getBestEstimator().coef_.flatten()) def mixed_selection(): if self._verbose: print("Executing: mixed_selection") X = self._X y = self._y initial_list = [] threshold_in_specified = False threshold_out_specified = False if "mixed_selection__threshold_in" in self._allParams.keys(): assert (isinstance( self._allParams["mixed_selection__threshold_in"], float), "threshold_in must be a float") threshold_in = self._allParams["mixed_selection__threshold_in"] threshold_in_specified = True else: threshold_in = 0.01 if "mixed_selection__threshold_out" in self._allParams.keys(): assert (isinstance( self._allParams["mixed_selection__threshold_out"], float), "threshold_out must be a float") threshold_out = self._allParams[ "mixed_selection__threshold_out"] threshold_out_specified = True else: threshold_out = 0.05 if "mixed_selection__verbose" in self._allParams.keys(): assert (isinstance(self._allParams["mixed_selection__verbose"], bool), "verbose must be a bool") verbose = self._allParams["mixed_selection__verbose"] else: verbose = False if threshold_in_specified and threshold_out_specified: assert ( threshold_in < threshold_out, "threshold in must be strictly less than the threshold out to avoid infinite looping." ) #initial_list = self._initial_list #threshold_in = self._threshold_in #threshold_out = self._threshold_out #verbse = self._verbose """ Perform a forward-backward feature selection based on p-value from statsmodels.api.OLS Arguments: X - pandas.DataFrame with candidate features y - list-like with the target initial_list - list of features to start with (column names of X) threshold_in - include a feature if its p-value < threshold_in threshold_out - exclude a feature if its p-value > threshold_out verbose - whether to print the sequence of inclusions and exclusions Returns: list of selected features Always set threshold_in < threshold_out to avoid infinite looping. See https://en.wikipedia.org/wiki/Stepwise_regression for the details """ included = list(initial_list) while True: changed = False # forward step excluded = list(set(X.columns) - set(included)) new_pval = pd.Series(index=excluded) for new_column in excluded: model = sm.OLS( y, sm.add_constant( pd.DataFrame(X[included + [new_column]]))).fit() new_pval[new_column] = model.pvalues[new_column] best_pval = new_pval.min() if best_pval < threshold_in: best_feature = new_pval.idxmin() #best_feature = new_pval.argmin() included.append(best_feature) changed = True if verbose: print('Adding {:30} with p-value {:.6}'.format( best_feature, best_pval)) # backward step model = sm.OLS(y, sm.add_constant(pd.DataFrame( X[included]))).fit() # use all coefs except intercept pvalues = model.pvalues.iloc[1:] worst_pval = pvalues.max() # null if pvalues is empty if worst_pval > threshold_out: changed = True worst_feature = pvalues.idxmax() #worst_feature = pvalues.argmax() included.remove(worst_feature) if verbose: print('Dropping {:30} with p-value {:.6}'.format( worst_feature, worst_pval)) if not changed: break new_included = [] for col in X.columns: if col in included: new_included.append(1) else: new_included.append(0) return new_included def partialLeastSquaresRegression(): if self._verbose: print("Executing: plsr") # The components are not helpful for this context. They might be for transformation, however. #if "plsr__n_components" in self._allParams.keys(): # n_components = self._allParams["plsr__n_components"] #else: # n_components = 2 pls_model = PLSRegression() pls_out = pls_model.fit(self._X, self._y) # The coefficients are used to show direction of the relationship return abs(pls_out.coef_.flatten()) ############################################ ########## Classifiers Start Here ########## ############################################ def adaptiveBoostingClassifier(): self._abc_params = {} for k, v in self._allParams.items(): if "abc" in k: self._abc_params[k] = v model = AdaptiveBoostingClassifierPredictiveModel( self._X_train, self._y_train, self._abc_params, self._nfolds, self._n_jobs, self._verbose) return model.getBestEstimator().feature_importances_.flatten() def randomForestClassifier(): self._rfc_params = {} for k, v in self._allParams.items(): if "rfc" in k: self._rfc_params[k] = v model = RandomForestClassifierPredictiveModel( self._X_train, self._y_train, self._rfc_params, self._nfolds, self._n_jobs, self._verbose) return model.getBestEstimator().feature_importances_.flatten() def supportVectorClassifier(): self._svc_params = {} for k, v in self._allParams.items(): if "svc" in k: self._svc_params[k] = v self._svc_params["svc__kernel"] = ['linear'] model = SupportVectorClassifierPredictiveModel( self._X_train, self._y_train, self._svc_params, self._nfolds, self._n_jobs, self._verbose) coefs = model.getBestEstimator().coef_ prods = coefs[0, :] for i in range(1, len(coefs)): prods = np.multiply(prods, coefs[i, :]) return abs(prods) # Valid regressors regression_options = { "mixed_selection": mixed_selection, "svr": supportVectorRegression, "rfr": randomForestRegression, "abr": adaptiveBoostingRegression, "lasso": lassoRegression, "enet": elasticNetRegression, "plsr": partialLeastSquaresRegression } # Valid classifiers classification_options = { 'abc': adaptiveBoostingClassifier, 'rfc': randomForestClassifier, 'svc': supportVectorClassifier } # Define return dictionary return_dict = {} # Train test split self._X_train, self._X_test, self._y_train, self._y_test = train_test_split( self._X, self._y, test_size=self._test_size) # Wrapper models self._key_features = {} if self._regressors: for key in models_to_flow: self._key_features[key] = regression_options[key]() elif self._regressors == False: for key in models_to_flow: self._key_features[key] = classification_options[key]() else: print( "Invalid model type. Please set regressors=True or regressors=False." ) print if self._verbose: print return_dict['feature_importances'] = self._key_features self._ensemble_results = None self._kept_features = None if self._ensemble: alternative_names = self._X.columns.tolist() criterion_names = list(self._key_features.keys()) criteria = [MAX for i in criterion_names] weights = [ i / len(criterion_names) for i in range(len(criterion_names)) ] df = pd.DataFrame(self._key_features, index=alternative_names) data = Data(df.as_matrix(), criteria, weights, anames=df.index.tolist(), cnames=df.columns) #if self._verbose: #data.plot("radar"); dm1 = simple.WeightedSum() dm2 = simple.WeightedProduct() dm3 = closeness.TOPSIS() dec1 = dm1.decide(data) dec2 = dm2.decide(data) dec3 = dm3.decide(data) self._ensemble_results = pd.DataFrame( { "TOPSIS": dec3.rank_, "WeightedSum": dec1.rank_, "WeightedProduct": dec2.rank_ }, index=df.index.tolist()) # Only keep features that our decision makers deemed in the top % specified num_features_requested = math.ceil( len(alternative_names) * self._featurePercentage) ranks = dec1.rank_ + dec2.rank_ + dec3.rank_ argmin_sorted = np.argpartition(ranks, num_features_requested) self._kept_features = [] count = 0 for i in argmin_sorted: self._kept_features.append(alternative_names[i]) count += 1 if count >= num_features_requested: break print("", self._featurePercentage * 100, " % -> (" + str(num_features_requested) + ") features kept.") print(self._kept_features) # Print data with only those features return_dict['ensemble_results'] = self._ensemble_results return_dict['kept_features'] = self._kept_features return return_dict
def f1(): st.title("Fast Ranking") st.markdown("<description> Rapid ranking based on indicator data </description>", unsafe_allow_html = True) st.sidebar.title("") #--------------------------------------------------# # Upload data #--------------------------------------------------# st.markdown("## 1. Upload Your File", unsafe_allow_html = True) data_file = st.file_uploader("Supported Format: xlsx", type=["xlsx"]) # read data if data_file is not None: inputData = pd.read_excel(data_file, engine = "openpyxl") # show the head of data is_show_data = st.checkbox("Show content of your file?") st.markdown("The top 5 rows of your uploaded data", unsafe_allow_html = True) if is_show_data: st.write(inputData.head()) #--------------------------------------------------# # Settings #--------------------------------------------------# st.markdown("## ", unsafe_allow_html = True) st.markdown("## 2. Settings", unsafe_allow_html = True) numerics = ["int16", "int32", "int64", "float16", "float32", "float64"] numDF = inputData.select_dtypes(include = numerics) numCols = numDF.columns.tolist() # non-numerical columns idCols = [col for col in inputData.columns.tolist() if col not in numDF] #--------------------------------------------------# # Decide indicator relationship with the ranking #--------------------------------------------------# st.markdown("### Select Positive Indicators", unsafe_allow_html = True) # select positive indicators stPos = st.multiselect("Positive Indicator Columns", numCols) unselectedCols = [col for col in numCols if col not in stPos] # select negative indicators st.markdown("### Select Negative Indicators", unsafe_allow_html = True) stNeg = st.multiselect("Negative Indicator Columns", unselectedCols) #--------------------------------------------------# # collect information provided by user #--------------------------------------------------# criteria =[] selectedCols = [] for colName in numCols: if colName in stPos: criteria.append(max) selectedCols.append(colName) elif colName in stNeg: criteria.append(min) selectedCols.append(colName) else: pass print("criteria",criteria) print(selectedCols) #--------------------------------------------------# # Run Ranking Algorithm #--------------------------------------------------# st.markdown("## ", unsafe_allow_html = True) if len(selectedCols) > 0: st.markdown("## 3. Run Ranking Algorithm",unsafe_allow_html = True) st.markdown("## ",unsafe_allow_html = True) sortData = st.radio("Sort results by ranks?",("Yes", "No")) if st.button("Run"): # X X = inputData[selectedCols] # fill NA X.fillna(0,inplace=True) # scaling X min_max_scaler =MinMaxScaler() X_scaled_values = min_max_scaler.fit_transform(X.values) # format as dataframe X_scaled = pd.DataFrame(X_scaled_values, columns=X.columns) # prepare input data for ranking criteria_data = Data(X_scaled, criteria, cnames = X_scaled.columns) #------------------------------------# # ranking algorithm: WeightedProduct #------------------------------------# dp = simple.WeightedProduct() # run ranking algorithm dec_dp = dp.decide(criteria_data) #--------------------------------------------------# # Save results #--------------------------------------------------# # add ranking result back to selected data X.loc[:, "Rank_using_selected_columns"] = dec_dp.rank_ # add id cols back outputData = pd.concat([inputData[idCols].reset_index(drop=True), X], axis =1) # ask users whether sorting is needed if sortData == "Yes": st.write("Results are sorted by ranks") saveData =outputData.sort_values(by ="Rank_using_selected_columns") else: saveData = outputData #--------------------------------------------------# # Download results #--------------------------------------------------# st.markdown("## ", unsafe_allow_html = True) st.markdown("## 4. Download Results", unsafe_allow_html = True) st.markdown("The top 5 rows of results", unsafe_allow_html = True) st.write(saveData.head()) st.markdown(get_table_download_link(saveData), unsafe_allow_html=True)
#cities now normalized input_city_node = list(nodes[cityName]) print("Normalized: ", input_city_node) for node in nodes.values(): node = [abs(node[i] - input_city_node[i]) for i in range(len(node))] x = normalize_weights(weights) criteria = x[0] weights = x[1] print criteria print weights criteria_8 = [ criteria[0], criteria[1], criteria[1], criteria[1], criteria[1], criteria[2], criteria[3], criteria[3] ] weights_8 = [ weights[0], weights[1] / 4, weights[1] / 4, weights[1] / 4, weights[1] / 4, weights[2], weights[3] / 2, weights[3] / 2 ] data = Data(nodes.values(), criteria_8, anames=nodes.keys(), weights=weights_8) print "normalized weights: " + str(data.weights) print data.criteria weights_8 = [abs(weight) for weight in weights_8] dm = closeness.TOPSIS() dec = dm.decide(data) print dec.e_.ideal for i in dec.rank_[:10]: print(nodes.keys()[i])
4257, 112, 20.17, 5671, 75.06, 3, 18, 27, 408, 66, 29, 6032, 124, 48.64, 5.88, 49.6, 4, 0 ]] mtx # let's says the first two alternatives are # for maximization and the last one for minimization criteria = [ MAX, MAX, MAX, MAX, MAX, MAX, MAX, MIN, MAX, MAX, MAX, MIN, MAX, MIN, MIN, MIN, MAX, MAX ] criteria # we use the built-in function as aliases data = Data(mtx, [ max, max, max, max, max, max, max, min, max, max, max, min, max, min, min, min, max, max ]) data = Data(mtx, criteria, anames=[ "SriLanka", "India", "England", "South Africa", "Australia", "NewZealand", "Pakistan", "West Indies", "Bangladesh", "Zimbabwe", "Ireland" ], cnames=[ "Run", "HS", "Average", "BF", "Strikerate", "Centuries", "Fifties", "zeros", "Fours", "Sixes", "Maiden", "Runs_bowl", "Wickets", "Average_bowling", "Economical", "Strikerate_bowl", "4wickets", "5wickets" ])
def get_madm_concensus(Wijk=None, num_optimizers=100, data_shape=(10,5), batch_size=10, policy=np.average, verbose=False): # Get data from simulation if Wijk is None: Wijk, move_sequence = get_k_optimizations(num_optimizers=num_optimizers, data_shape=data_shape, batch_size=batch_size, verbose=verbose) # Construct alternative-space alternatives={} alternative_num=0 for i in range(Wijk[:,:,0].shape[0]): for j in range(Wijk[:,:,0].shape[1]): alternatives[alternative_num]=(i,j) alternative_num+=1 #print(alternatives) # Construct decision-matrix DM=np.empty((alternative_num,Wijk.shape[2])) for a,loc in alternatives.items(): for k in range(Wijk.shape[2]): DM[a,k]=Wijk[loc[0],loc[1],k] #print(DM) # Putting it all together alternative_names = [v for k,v in alternatives.items()] criterion_names = [k for k in range(Wijk.shape[2])] criteria = [MAX for i in criterion_names] weights = [1/len(criterion_names) for i in range(len(criterion_names))] df = pd.DataFrame(DM, index=alternative_names, columns=criterion_names) if verbose: print("Alternatives {}".format(alternative_names)) print("Criteria {}".format(criterion_names)) print("Weights {}".format(weights)) print("Decision Matrix {}".format(df)) # Execute MADM data = Data(df.as_matrix(), criteria, weights, anames=df.index.tolist(), cnames=df.columns ) # Execute on 3 decision makers dm1 = simple.WeightedSum() dm2 = simple.WeightedProduct() dm3 = closeness.TOPSIS() dec1 = dm1.decide(data) dec2 = dm2.decide(data) dec3 = dm3.decide(data) ranks=[dec1.rank_, dec2.rank_,dec3.rank_] results = pd.DataFrame({"TOPSIS":dec3.rank_, "WeightedSum":dec1.rank_, "WeightedProduct":dec2.rank_}, index=df.index.tolist()) if verbose: print("MADM Results: {}".format(results)) concensus_results=pd.DataFrame({"ConsensusRank":policy(results, axis=1)},index=results.index) rij=concensus_results.as_matrix().reshape(Wijk.shape[0],Wijk.shape[1]) rij_move_sequence=np.argmin(rij,axis=1) #if verbose: # print("rij {}".format(rij)) # print("rij_move_sequence {}".format(rij_move_sequence)) return rij, rij_move_sequence #wijk,_=get_k_optimizations(data=None, num_optimizers=5, data_shape=(10,5), batch_size=5, verbose=True) #rij, _=get_madm_concensus(Wijk=wijk, policy=np.average ) #print(rij) #print(_)