Exemplo n.º 1
0
    def execute(self):

        alternative_names = self._X.columns.tolist()
        criterion_names = list(self._key_features.keys())
        criteria = [MAX for i in criterion_names]
        weights = [
            i / len(criterion_names) for i in range(len(criterion_names))
        ]

        df = pd.DataFrame(self._key_features, index=alternative_names)

        data = Data(df.as_matrix(),
                    criteria,
                    weights,
                    anames=df.index.tolist(),
                    cnames=df.columns)
        #if self._verbose:
        #data.plot("radar");

        dm1 = simple.WeightedSum()
        dm2 = simple.WeightedProduct()
        dm3 = closeness.TOPSIS()
        dec1 = dm1.decide(data)
        dec2 = dm2.decide(data)
        dec3 = dm3.decide(data)

        ranks = [dec1.rank_, dec2.rank_, dec3.rank_]
        self._ensemble_results = pd.DataFrame(
            {
                "TOPSIS": dec3.rank_,
                "WeightedSum": dec1.rank_,
                "WeightedProduct": dec2.rank_
            },
            index=df.index.tolist())

        # Only keep features that our decision makers deemed in the top % specified
        num_features_requested = math.ceil(
            len(alternative_names) * self._featurePercentage)
        sum_ranks = sum(ranks)
        argmin_sorted = np.argpartition(sum_ranks, num_features_requested)
        self._kept_features = []

        count = 0
        for i in argmin_sorted:
            self._kept_features.append(alternative_names[i])
            count += 1
            if count >= num_features_requested:
                break

        if self._verbose:
            print("", self._featurePercentage * 100,
                  " % -> (" + str(num_features_requested) + ") features kept.")
            print(self._kept_features)

        return self._ensemble_results, self._kept_features
Exemplo n.º 2
0
def rank_label(df_lab,  wgt):

  criteria_data = Data(
      df_lab.iloc[:, 1:5],          # the pandas dataframe
      [MIN, MIN, MIN, MIN],      # direction of goodness for each column
      anames = df_lab['Index'], # each entity's name, here car name
      cnames = df_lab.columns[1:5], # attribute/column name
      weights=wgt          # weights for each attribute (optional)
      )

  df_lab_copy = df_lab.copy()

  # weighted sum, sumNorm
  dm = simple.WeightedSum(mnorm="sum")
  #print(dm.tolist())
  dec = dm.decide(criteria_data)
  #print(dec)
  df_lab_copy.loc[:, 'rank_weightedSum_sumNorm_inverse'] = dec.rank_

  # weighted sum, maxNorm
  dm = simple.WeightedSum(mnorm="max")
  dec = dm.decide(criteria_data)
  df_lab_copy.loc[:, 'rank_weightedSum_maxNorm_inverse'] = dec.rank_

  # weighted product, sumNorm
  dm = simple.WeightedProduct(mnorm="sum")
  dec = dm.decide(criteria_data)
  df_lab_copy.loc[:, 'rank_weightedProduct_sumNorm_inverse'] = dec.rank_

  # weighted product, sumNorm
  dm = simple.WeightedProduct(mnorm="max")
  dec = dm.decide(criteria_data)
  df_lab_copy.loc[:, 'rank_weightedProduct_maxNorm_inverse'] = dec.rank_

  # sort for better visualization
  df_lab_copy.sort_values(by=['rank_weightedSum_sumNorm_inverse'], inplace=True)

  return df_lab_copy
Exemplo n.º 3
0
print("Generate a kernel of best alternatives?", dec.beta_solution_)
print("Choose the best alternative?", dec.gamma_solution_)

#The rank as numpy array (if this decision is a 𝛼-solution)

print(dec.rank_)

#The index of the row of the best alternative (if this decision is a 𝛾-solution)
print(dec.best_alternative_, data.anames[dec.best_alternative_])

#And the kernel of the non supered alternatives (if this decision is a 𝛽-solution)
# this return None because this
# decision is not a beta-solution
print(dec.kernel_)

dm = simple.WeightedProduct()
print(dm)

dec = dm.decide(data)
print(dec)

#TOPSIS
dm = closeness.TOPSIS()
print(dm)

dec = dm.decide(data)
print(dec)

#The TOPSIS add more information into the decision object.
print(dec.e_)
print("Ideal:", dec.e_.ideal)
Exemplo n.º 4
0
def f1():
    st.title("Fast Ranking")
    st.markdown("<description> Rapid ranking based on indicator data </description>",
                unsafe_allow_html = True)
    st.sidebar.title("")
    #--------------------------------------------------#
    # Upload data
    #--------------------------------------------------#
    st.markdown("## 1. Upload Your File",
                unsafe_allow_html = True)
    data_file = st.file_uploader("Supported Format: xlsx", type=["xlsx"])

    # read data
    if data_file is not None:
        inputData = pd.read_excel(data_file, engine = "openpyxl") 
        # show the head of data
        is_show_data = st.checkbox("Show content of your file?")
        st.markdown("The top 5 rows of your uploaded data",
                        unsafe_allow_html = True)
        if is_show_data:
            st.write(inputData.head())
        #--------------------------------------------------#
        # Settings    
        #--------------------------------------------------#
        st.markdown("## ",
                    unsafe_allow_html = True)   
        st.markdown("## 2. Settings",
                unsafe_allow_html = True)
        numerics = ["int16", "int32", "int64", "float16", "float32", "float64"]
        numDF = inputData.select_dtypes(include = numerics)
        numCols = numDF.columns.tolist()

        # non-numerical columns 
        idCols = [col for col in inputData.columns.tolist() if col not in numDF]
        #--------------------------------------------------#    
        # Decide indicator relationship with the ranking 
        #--------------------------------------------------#
        st.markdown("### Select Positive Indicators",
                unsafe_allow_html = True) 

        # select positive indicators
        stPos = st.multiselect("Positive Indicator Columns", numCols)
        unselectedCols = [col for col in numCols  if col not in stPos]
        # select negative indicators
        st.markdown("### Select Negative Indicators",
                unsafe_allow_html = True) 
        stNeg = st.multiselect("Negative Indicator Columns", unselectedCols)
        #--------------------------------------------------#
        # collect information provided by user
        #--------------------------------------------------#
        criteria =[]
        selectedCols = []
        for colName in numCols:
            if colName in stPos:
                criteria.append(max)
                selectedCols.append(colName)
            elif colName in stNeg:
                criteria.append(min)
                selectedCols.append(colName)
            else:
                pass
            print("criteria",criteria)
            print(selectedCols)

        #--------------------------------------------------#
        # Run Ranking Algorithm
        #--------------------------------------------------#
        st.markdown("## ",
                    unsafe_allow_html = True)   
        if len(selectedCols) > 0:
            st.markdown("## 3. Run Ranking Algorithm",unsafe_allow_html = True)   
            st.markdown("## ",unsafe_allow_html = True)
            sortData = st.radio("Sort results by ranks?",("Yes", "No"))
            if st.button("Run"):
                # X   
                X = inputData[selectedCols]

                # fill NA
                X.fillna(0,inplace=True)

                # scaling X
                min_max_scaler =MinMaxScaler()
                X_scaled_values = min_max_scaler.fit_transform(X.values)

                # format as dataframe
                X_scaled = pd.DataFrame(X_scaled_values, columns=X.columns)

                # prepare input data for ranking
                criteria_data = Data(X_scaled, 
                        criteria,
                        cnames = X_scaled.columns)
                #------------------------------------#        
                # ranking algorithm: WeightedProduct  
                #------------------------------------#    
                dp = simple.WeightedProduct()
                # run ranking algorithm 
                dec_dp = dp.decide(criteria_data)
                
                #--------------------------------------------------#
                # Save results
                #--------------------------------------------------#
                # add ranking result back to selected data
                X.loc[:, "Rank_using_selected_columns"] = dec_dp.rank_

                # add id cols back 
                outputData = pd.concat([inputData[idCols].reset_index(drop=True), X], axis =1)     
                  

                # ask users whether sorting is needed
                
                if sortData == "Yes":
                    st.write("Results are sorted by ranks")
                    saveData =outputData.sort_values(by ="Rank_using_selected_columns")    
                else:
                    saveData = outputData
                    
                #--------------------------------------------------#
                # Download results
                #--------------------------------------------------#
                st.markdown("## ",
                        unsafe_allow_html = True)  

                 
            
                st.markdown("## 4. Download Results",
                        unsafe_allow_html = True)   
                st.markdown("The top 5 rows of results",
                        unsafe_allow_html = True)       
                st.write(saveData.head())   
                st.markdown(get_table_download_link(saveData), 
                            unsafe_allow_html=True)
Exemplo n.º 5
0
                xticklabels=attribute_names,
                fmt='.2g')


###########
# print final ranking table with different multi criteria decision makers

dm = simple.WeightedSum()
dec = dm.decide(criteria_data)
print(dec)
print(dec.e_.points)  ##print each rank's value
print(dec.rank_)  ##print ranks

print("==============================")

dm = simple.WeightedProduct()
dec = dm.decide(criteria_data)
print(dec)
print(dec.e_.points)  ##print each rank's value
print(dec.rank_)  ##print ranks

print("==============================")

dm = closeness.TOPSIS()
dec = dm.decide(criteria_data)
print(dec)
print("Ideal:", dec.e_.ideal)  ##print each rank's value
print("Anti-Ideal:", dec.e_.anti_ideal)
print("Closeness:", dec.e_.closeness)

######################
Exemplo n.º 6
0
    def flow(self,
             models_to_flow=[],
             params=None,
             test_size=0.2,
             nfolds=3,
             nrepeats=3,
             n_jobs=1,
             metrics=[],
             verbose=False,
             regressors=True,
             ensemble=False,
             featurePercentage=0.25):

        # Enforce parameters
        assert isinstance(nfolds, int), "nfolds must be integer"
        assert isinstance(nrepeats, int), "nrepeats must be integer"
        assert isinstance(n_jobs, int), "n_jobs must be integer"
        assert isinstance(verbose, bool), "verbosem ust be bool"
        assert isinstance(params, dict), "params must be a dict"
        assert isinstance(test_size, float), "test_size must be a float"
        assert isinstance(metrics, list), "model scoring must be a list"
        assert isinstance(regressors, bool), "regressor must be bool"
        assert isinstance(ensemble, bool), "ensemble must be bool"

        # Enforce logic for regressors
        #if regressors:
        #  assert(not any(["c" in k.split("__") for k,v in params.items()]), "You selected classifiers with the regressors flag true. Comon\' man!")
        #else:
        #  assert(not any(["r" in k.split("__") for k,v in params.items()]), "You selected regressors with the regressors flag false. Comon\' man!")

        self._nfolds = nfolds
        self._nrepeats = nrepeats
        self._n_jobs = n_jobs
        self._verbose = verbose
        self._allParams = params
        self._metrics = metrics
        self._test_size = test_size
        self._regressors = regressors
        self._ensemble = ensemble
        self._featurePercentage = featurePercentage

        # Inform the streamline to user.
        stringbuilder = ""
        for thing in models_to_flow:
            stringbuilder += thing
            stringbuilder += " --> "

        if self._verbose:

            if self._regressors:
                print("*************************")
                print("=> (Regressor) " + "=> Feature Selection Streamline: " +
                      stringbuilder[:-5])
                print("*************************")
            elif self._regressors == False:
                print("*************************")
                print("=> (Classifier) " +
                      "=> Feature Selection Streamline: " + stringbuilder[:-5])
                print("*************************")
            else:
                print(
                    "Invalid model selected. Please set regressors=True or regressors=False."
                )
                print

        def supportVectorRegression():
            self._svr_params = {}
            for k, v in self._allParams.items():
                if "svr" in k:
                    self._svr_params[k] = v

            self._svr_params["svr__kernel"] = ['linear']
            model = SupportVectorRegressorPredictiveModel(
                self._X_train, self._y_train, self._svr_params, self._nfolds,
                self._n_jobs, self._verbose)
            return abs(model.getBestEstimator().coef_.flatten())

        def randomForestRegression():
            self._rfr_params = {}
            for k, v in self._allParams.items():
                if "rfr" in k:
                    self._rfr_params[k] = v

            model = RandomForestRegressorPredictiveModel(
                self._X_train, self._y_train, self._rfr_params, self._nfolds,
                self._n_jobs, self._verbose)
            return abs(model.getBestEstimator().feature_importances_.flatten())

        def adaptiveBoostingRegression():
            self._abr_params = {}
            for k, v in self._allParams.items():
                if "abr" in k:
                    self._abr_params[k] = v

            model = AdaptiveBoostingRegressorPredictiveModel(
                self._X_train, self._y_train, self._abr_params, self._nfolds,
                self._n_jobs, self._verbose)
            return abs(model.getBestEstimator().feature_importances_.flatten())

        def lassoRegression():
            self._lasso_params = {}
            for k, v in self._allParams.items():
                if "lasso" in k:
                    self._lasso_params[k] = v

            model = LassoRegressorPredictiveModel(self._X_train, self._y_train,
                                                  self._lasso_params,
                                                  self._nfolds, self._n_jobs,
                                                  self._verbose)
            return abs(model.getBestEstimator().coef_.flatten())

        def elasticNetRegression():
            self._enet_params = {}
            for k, v in self._allParams.items():
                if "enet" in k:
                    self._enet_params[k] = v

            model = ElasticNetRegressorPredictiveModel(
                self._X_train, self._y_train, self._enet_params, self._nfolds,
                self._n_jobs, self._verbose)
            return abs(model.getBestEstimator().coef_.flatten())

        def mixed_selection():

            if self._verbose:
                print("Executing: mixed_selection")

            X = self._X
            y = self._y

            initial_list = []
            threshold_in_specified = False
            threshold_out_specified = False

            if "mixed_selection__threshold_in" in self._allParams.keys():
                assert (isinstance(
                    self._allParams["mixed_selection__threshold_in"],
                    float), "threshold_in must be a float")
                threshold_in = self._allParams["mixed_selection__threshold_in"]
                threshold_in_specified = True
            else:
                threshold_in = 0.01

            if "mixed_selection__threshold_out" in self._allParams.keys():
                assert (isinstance(
                    self._allParams["mixed_selection__threshold_out"],
                    float), "threshold_out must be a float")
                threshold_out = self._allParams[
                    "mixed_selection__threshold_out"]
                threshold_out_specified = True
            else:
                threshold_out = 0.05

            if "mixed_selection__verbose" in self._allParams.keys():
                assert (isinstance(self._allParams["mixed_selection__verbose"],
                                   bool), "verbose must be a bool")
                verbose = self._allParams["mixed_selection__verbose"]
            else:
                verbose = False

            if threshold_in_specified and threshold_out_specified:
                assert (
                    threshold_in < threshold_out,
                    "threshold in must be strictly less than the threshold out to avoid infinite looping."
                )

            #initial_list = self._initial_list
            #threshold_in = self._threshold_in
            #threshold_out = self._threshold_out
            #verbse = self._verbose
            """ Perform a forward-backward feature selection 
            based on p-value from statsmodels.api.OLS
            Arguments:
                X - pandas.DataFrame with candidate features
                y - list-like with the target
                initial_list - list of features to start with (column names of X)
                threshold_in - include a feature if its p-value < threshold_in
                threshold_out - exclude a feature if its p-value > threshold_out
                verbose - whether to print the sequence of inclusions and exclusions
            Returns: list of selected features 
            Always set threshold_in < threshold_out to avoid infinite looping.
            See https://en.wikipedia.org/wiki/Stepwise_regression for the details
            """

            included = list(initial_list)
            while True:
                changed = False

                # forward step
                excluded = list(set(X.columns) - set(included))
                new_pval = pd.Series(index=excluded)

                for new_column in excluded:

                    model = sm.OLS(
                        y,
                        sm.add_constant(
                            pd.DataFrame(X[included + [new_column]]))).fit()
                    new_pval[new_column] = model.pvalues[new_column]

                best_pval = new_pval.min()

                if best_pval < threshold_in:
                    best_feature = new_pval.idxmin()
                    #best_feature = new_pval.argmin()
                    included.append(best_feature)
                    changed = True
                    if verbose:
                        print('Adding  {:30} with p-value {:.6}'.format(
                            best_feature, best_pval))

                # backward step
                model = sm.OLS(y, sm.add_constant(pd.DataFrame(
                    X[included]))).fit()
                # use all coefs except intercept
                pvalues = model.pvalues.iloc[1:]
                worst_pval = pvalues.max()  # null if pvalues is empty
                if worst_pval > threshold_out:
                    changed = True
                    worst_feature = pvalues.idxmax()
                    #worst_feature = pvalues.argmax()
                    included.remove(worst_feature)
                    if verbose:
                        print('Dropping {:30} with p-value {:.6}'.format(
                            worst_feature, worst_pval))

                if not changed:
                    break

            new_included = []
            for col in X.columns:
                if col in included:
                    new_included.append(1)
                else:
                    new_included.append(0)

            return new_included

        def partialLeastSquaresRegression():

            if self._verbose:
                print("Executing: plsr")
            # The components are not helpful for this context. They might be for transformation, however.
            #if "plsr__n_components" in self._allParams.keys():
            #  n_components = self._allParams["plsr__n_components"]
            #else:
            #  n_components = 2
            pls_model = PLSRegression()
            pls_out = pls_model.fit(self._X, self._y)

            # The coefficients are used to show direction of the relationship
            return abs(pls_out.coef_.flatten())

        ############################################
        ########## Classifiers Start Here ##########
        ############################################

        def adaptiveBoostingClassifier():
            self._abc_params = {}
            for k, v in self._allParams.items():
                if "abc" in k:
                    self._abc_params[k] = v

            model = AdaptiveBoostingClassifierPredictiveModel(
                self._X_train, self._y_train, self._abc_params, self._nfolds,
                self._n_jobs, self._verbose)
            return model.getBestEstimator().feature_importances_.flatten()

        def randomForestClassifier():
            self._rfc_params = {}
            for k, v in self._allParams.items():
                if "rfc" in k:
                    self._rfc_params[k] = v

            model = RandomForestClassifierPredictiveModel(
                self._X_train, self._y_train, self._rfc_params, self._nfolds,
                self._n_jobs, self._verbose)
            return model.getBestEstimator().feature_importances_.flatten()

        def supportVectorClassifier():
            self._svc_params = {}
            for k, v in self._allParams.items():
                if "svc" in k:
                    self._svc_params[k] = v

            self._svc_params["svc__kernel"] = ['linear']
            model = SupportVectorClassifierPredictiveModel(
                self._X_train, self._y_train, self._svc_params, self._nfolds,
                self._n_jobs, self._verbose)

            coefs = model.getBestEstimator().coef_
            prods = coefs[0, :]
            for i in range(1, len(coefs)):
                prods = np.multiply(prods, coefs[i, :])
            return abs(prods)

        # Valid regressors
        regression_options = {
            "mixed_selection": mixed_selection,
            "svr": supportVectorRegression,
            "rfr": randomForestRegression,
            "abr": adaptiveBoostingRegression,
            "lasso": lassoRegression,
            "enet": elasticNetRegression,
            "plsr": partialLeastSquaresRegression
        }

        # Valid classifiers
        classification_options = {
            'abc': adaptiveBoostingClassifier,
            'rfc': randomForestClassifier,
            'svc': supportVectorClassifier
        }

        # Define return dictionary
        return_dict = {}

        # Train test split
        self._X_train, self._X_test, self._y_train, self._y_test = train_test_split(
            self._X, self._y, test_size=self._test_size)

        # Wrapper models
        self._key_features = {}

        if self._regressors:
            for key in models_to_flow:
                self._key_features[key] = regression_options[key]()
        elif self._regressors == False:
            for key in models_to_flow:
                self._key_features[key] = classification_options[key]()
        else:
            print(
                "Invalid model type. Please set regressors=True or regressors=False."
            )
            print
        if self._verbose:
            print

        return_dict['feature_importances'] = self._key_features

        self._ensemble_results = None
        self._kept_features = None
        if self._ensemble:

            alternative_names = self._X.columns.tolist()
            criterion_names = list(self._key_features.keys())
            criteria = [MAX for i in criterion_names]
            weights = [
                i / len(criterion_names) for i in range(len(criterion_names))
            ]

            df = pd.DataFrame(self._key_features, index=alternative_names)

            data = Data(df.as_matrix(),
                        criteria,
                        weights,
                        anames=df.index.tolist(),
                        cnames=df.columns)
            #if self._verbose:
            #data.plot("radar");

            dm1 = simple.WeightedSum()
            dm2 = simple.WeightedProduct()
            dm3 = closeness.TOPSIS()
            dec1 = dm1.decide(data)
            dec2 = dm2.decide(data)
            dec3 = dm3.decide(data)

            self._ensemble_results = pd.DataFrame(
                {
                    "TOPSIS": dec3.rank_,
                    "WeightedSum": dec1.rank_,
                    "WeightedProduct": dec2.rank_
                },
                index=df.index.tolist())

            # Only keep features that our decision makers deemed in the top % specified
            num_features_requested = math.ceil(
                len(alternative_names) * self._featurePercentage)
            ranks = dec1.rank_ + dec2.rank_ + dec3.rank_
            argmin_sorted = np.argpartition(ranks, num_features_requested)
            self._kept_features = []

            count = 0
            for i in argmin_sorted:
                self._kept_features.append(alternative_names[i])
                count += 1
                if count >= num_features_requested:
                    break

            print("", self._featurePercentage * 100,
                  " % -> (" + str(num_features_requested) + ") features kept.")
            print(self._kept_features)
            # Print data with only those features
            return_dict['ensemble_results'] = self._ensemble_results
            return_dict['kept_features'] = self._kept_features

        return return_dict
Exemplo n.º 7
0
def get_madm_concensus(Wijk=None, num_optimizers=100, data_shape=(10,5), batch_size=10, policy=np.average, verbose=False):
        
    # Get data from simulation
    if Wijk is None:
        Wijk, move_sequence = get_k_optimizations(num_optimizers=num_optimizers,
                                                  data_shape=data_shape,
                                                  batch_size=batch_size, 
                                                  verbose=verbose)
    
    # Construct alternative-space
    alternatives={}
    alternative_num=0
    for i in range(Wijk[:,:,0].shape[0]):
        for j in range(Wijk[:,:,0].shape[1]):
            alternatives[alternative_num]=(i,j)
            alternative_num+=1
    #print(alternatives)
    
    # Construct decision-matrix
    DM=np.empty((alternative_num,Wijk.shape[2]))
    for a,loc in alternatives.items():
        for k in range(Wijk.shape[2]):
            DM[a,k]=Wijk[loc[0],loc[1],k]
    #print(DM)
    
    # Putting it all together
    alternative_names = [v for k,v in alternatives.items()]
    criterion_names = [k for k in range(Wijk.shape[2])]
    criteria = [MAX for i in criterion_names]
    weights = [1/len(criterion_names) for i in range(len(criterion_names))]
    df = pd.DataFrame(DM,
                      index=alternative_names,
                      columns=criterion_names)
     
    if verbose:
        print("Alternatives {}".format(alternative_names))
        print("Criteria {}".format(criterion_names))
        print("Weights {}".format(weights))
        print("Decision Matrix {}".format(df))
    
    
    # Execute MADM
    data = Data(df.as_matrix(),
                criteria,
                weights,
                anames=df.index.tolist(),
                cnames=df.columns
                )
    
    # Execute on 3 decision makers
    dm1 = simple.WeightedSum()
    dm2 = simple.WeightedProduct()
    dm3 = closeness.TOPSIS()
    dec1 = dm1.decide(data)
    dec2 = dm2.decide(data)
    dec3 = dm3.decide(data)
    
    ranks=[dec1.rank_, dec2.rank_,dec3.rank_]
    results = pd.DataFrame({"TOPSIS":dec3.rank_,
                            "WeightedSum":dec1.rank_,
                            "WeightedProduct":dec2.rank_},
                            index=df.index.tolist())
    
    if verbose:
        print("MADM Results: {}".format(results))
    concensus_results=pd.DataFrame({"ConsensusRank":policy(results, axis=1)},index=results.index)
    rij=concensus_results.as_matrix().reshape(Wijk.shape[0],Wijk.shape[1])
    rij_move_sequence=np.argmin(rij,axis=1)
    #if verbose:
    #    print("rij {}".format(rij))
    #    print("rij_move_sequence {}".format(rij_move_sequence))
    return rij, rij_move_sequence

#wijk,_=get_k_optimizations(data=None, num_optimizers=5, data_shape=(10,5), batch_size=5, verbose=True)

#rij, _=get_madm_concensus(Wijk=wijk, policy=np.average )
#print(rij)
#print(_)