def get_optimal_candidates(self, candidates, criteria):
        names = self.candidate_names.iloc[candidates.index].values

        min_max = []
        weights = []
        columns = []

        for c in criteria:
            if c.maximize:
                min_max.append(MAX)
            else:
                min_max.append(MIN)

            weights.append(c.weight)
            columns.append(c.index)

        self.data_topsis = Data(candidates[columns].as_matrix().tolist(),
                                min_max,
                                weights=weights,
                                anames=names,
                                cnames=columns)
        model = closeness.TOPSIS()
        choice = model.decide(self.data_topsis)
        return [candidates.index[int(choice.best_alternative_)]
                ]  # Return must be a list
示例#2
0
def leaderboard():
    s_name = []
    s_roll = []
    name = []
    clarity = []
    Brightness = []
    Pixel = []
    Contrast = []
    Resolution = []
    Vignette = []
    for i in user_data.find():
        s_name.append(i['Name'])
        s_roll.append(i['Roll_No'])
        name.append(i['Image_Name'])
        clarity.append(float(i['clarity']))
        Brightness.append(float(i['Brightness']))
        Pixel.append(float(i['Pixel']))
        Contrast.append(float(i['Contrast']))
        Resolution.append(float(i['Resolution']))
        Vignette.append(float(i['Vignette']))
    df = pd.DataFrame({
        'image_name': name,
        'clarity': clarity,
        'Brightness': Brightness,
        'Pixel': Pixel,
        'Contrast': Contrast,
        'Resolution': Resolution,
        'Vignette': Vignette
    })
    criteria = [MAX, MAX, MAX, MAX, MIN, MAX]
    ds = np.array(df)
    ds1 = ds[:, 1:]
    data = Data(ds1,
                criteria,
                weights=[
                    float(1.0) / 6,
                    float(1.0) / 6,
                    float(1.0) / 6,
                    float(1.0) / 6,
                    float(1.0) / 6,
                    float(1.0) / 6
                ],
                anames=ds[:, -1],
                cnames=[
                    "Brightness", "Contrast", "Pixel", "Resolution",
                    "Vignette", "Clarity"
                ])
    t = closeness.TOPSIS()
    dec = t.decide(data)
    rank = dec.rank_
    y = rank.astype(np.int)
    topsis_score = dec.e_.closeness
    name = ds[:, 0]
    result = [s_name, s_roll, y, topsis_score]
    result = np.array(result)
    result = result.T
    final = result[result[:, 2].argsort()]
    return render_template("leaderboard.html", result=final)
示例#3
0
def best_alternative(map_paths, delta_time):
    """计算最好的轨迹"""
    mtx = road_score(map_paths)
    mtx = np.column_stack((mtx, time_score(map_paths, delta_time), mode_score(map_paths)))
    criteria = [sk.MAX, sk.MAX, sk.MAX]
    data = sk.Data(mtx, criteria, cnames=['road', 'time', 'turn'], weights=[.4, .2, .4])
    dm = closeness.TOPSIS()
    dec = dm.decide(data)
    return map_paths[dec.best_alternative_]
示例#4
0
def perform_topsis(raw_data, survey_data):
    SITE_ROOT = os.path.dirname(os.path.realpath(__file__))
    df = pd.read_csv(SITE_ROOT + "/data/cleaned_data.csv")
    matrix = []
    ids = []
    # for matrix:
    # overall weather diff avg | crimerate | nightlife score
    for school in raw_data['schools']:
        cur_id = school['id']
        ids.append(cur_id)
        winter_temp = float(df.loc[df['UNITID'] == np.int64(int(cur_id)),
                                   'WINTER_TAVG'].iloc[0])
        spring_temp = float(df.loc[df['UNITID'] == np.int64(int(cur_id)),
                                   'SPRING_TAVG'].iloc[0])
        summer_temp = float(df.loc[df['UNITID'] == np.int64(int(cur_id)),
                                   'SUMMER_TAVG'].iloc[0])
        fall_temp = float(df.loc[df['UNITID'] == np.int64(int(cur_id)),
                                 'FALL_TAVG'].iloc[0])

        winter_diff = abs(float(survey_data['winter']) - winter_temp)
        spring_diff = abs(float(survey_data['spring']) - spring_temp)
        summer_diff = abs(float(survey_data['summer']) - summer_temp)
        fall_diff = abs(float(survey_data['fall']) - fall_temp)
        diff = (winter_diff + spring_diff + summer_diff + fall_diff) / 4.0

        max_crimerate = float(df['CRIME_COUNT'].max())
        crimerate = float(df.loc[df['UNITID'] == np.int64(int(cur_id)),
                                 'CRIME_COUNT'].iloc[0])

        matrix.append([
            diff, crimerate / max_crimerate,
            get_bar_data(school['lat'], school['lon'])
        ])

    #print ( matrix )
    criteria = [MIN, MIN, MAX]
    data = Data(matrix,
                criteria,
                weights=[
                    float(raw_data['weather']['importance']),
                    float(raw_data['crime']['importance']),
                    float(raw_data['nightlife']['importance'])
                ],
                anames=ids,
                cnames=["weather", "crime", "nightlife"])

    analysis = closeness.TOPSIS()
    res = analysis.decide(data)
    #print( res )
    rank_list = res.rank_.tolist()
    sorted_ids = [None] * len(ids)
    for i in range(0, len(rank_list)):
        sorted_ids[rank_list[i] - 1] = int(ids[i])
    #print( sorted_ids )
    return sorted_ids
    def execute(self):

        alternative_names = self._X.columns.tolist()
        criterion_names = list(self._key_features.keys())
        criteria = [MAX for i in criterion_names]
        weights = [
            i / len(criterion_names) for i in range(len(criterion_names))
        ]

        df = pd.DataFrame(self._key_features, index=alternative_names)

        data = Data(df.as_matrix(),
                    criteria,
                    weights,
                    anames=df.index.tolist(),
                    cnames=df.columns)
        #if self._verbose:
        #data.plot("radar");

        dm1 = simple.WeightedSum()
        dm2 = simple.WeightedProduct()
        dm3 = closeness.TOPSIS()
        dec1 = dm1.decide(data)
        dec2 = dm2.decide(data)
        dec3 = dm3.decide(data)

        ranks = [dec1.rank_, dec2.rank_, dec3.rank_]
        self._ensemble_results = pd.DataFrame(
            {
                "TOPSIS": dec3.rank_,
                "WeightedSum": dec1.rank_,
                "WeightedProduct": dec2.rank_
            },
            index=df.index.tolist())

        # Only keep features that our decision makers deemed in the top % specified
        num_features_requested = math.ceil(
            len(alternative_names) * self._featurePercentage)
        sum_ranks = sum(ranks)
        argmin_sorted = np.argpartition(sum_ranks, num_features_requested)
        self._kept_features = []

        count = 0
        for i in argmin_sorted:
            self._kept_features.append(alternative_names[i])
            count += 1
            if count >= num_features_requested:
                break

        if self._verbose:
            print("", self._featurePercentage * 100,
                  " % -> (" + str(num_features_requested) + ") features kept.")
            print(self._kept_features)

        return self._ensemble_results, self._kept_features
示例#6
0
    def calculate(priority, products):
        new_scores = {}
        scores = products['scores']
        product_ids = list(scores.keys())

        if len(product_ids) == 0:
            return

        first_product_id = product_ids[0]
        entity = list(scores[first_product_id].keys())

        criteria = [MAX] * len(entity)
        matrix = Topsis.create_matrix(scores)
        priority = Topsis.reshape_priority(priority)

        data = Data(matrix,
                    criteria,
                    weights=priority,
                    anames=product_ids,
                    cnames=entity)

        dm = closeness.TOPSIS(mnorm="sum")
        decision = dm.decide(data)
        ranks = decision.rank_.tolist()

        for idx in range(len(ranks)):
            pid = product_ids[idx]

            new_scores[pid] = {
                'pid': pid,
                'rank': ranks[idx],
                'attrs': scores[pid]
            }

        products['scores'] = new_scores

        return decision
示例#7
0
#The index of the row of the best alternative (if this decision is a 𝛾-solution)
print(dec.best_alternative_, data.anames[dec.best_alternative_])

#And the kernel of the non supered alternatives (if this decision is a 𝛽-solution)
# this return None because this
# decision is not a beta-solution
print(dec.kernel_)

dm = simple.WeightedProduct()
print(dm)

dec = dm.decide(data)
print(dec)

#TOPSIS
dm = closeness.TOPSIS()
print(dm)

dec = dm.decide(data)
print(dec)

#The TOPSIS add more information into the decision object.
print(dec.e_)
print("Ideal:", dec.e_.ideal)
print("Anti-Ideal:", dec.e_.anti_ideal)
print("Closeness:", dec.e_.closeness)

#Finally we can change the normalization criteria of the alternative matric to sum (divide every value by the sum opf
#their criteria) and check the result:
#dm = closeness.TOPSIS(mnorm="sum")
dm = closeness.TOPSIS()
示例#8
0
def main():
    #
    # Read the data
    #
    matrix = []
    names = []
    properties = []
    read_data('data_with_missing_values.csv', matrix, names, properties)

    #
    # complete missing data
    #
    matrix = complete_data(matrix)

    #
    # Prepare the weights
    #

    # properties = ['ScreenSize', 'PrimaryCamera', 'SecondaryCamera', 'RAM', 'Battery', 'Memory', 'SDSlot', 'TalkTime',
    #             'Price', 'Announced', 'VoiceControl', 'SoundSpeaker', 'Weight', 'PhysicalKeyboard']

    regular_users_weights = [
        0.07, 0.07, 0.05, 0.09, 0.11, 0.08, 0.06, 0.09, 0.12, 0.02, 0.05, 0.07,
        0.08, 0.04
    ]  # sum(weights) = 1
    children_weights = [
        0.06, 0.07, 0.05, 0.09, 0.09, 0.07, 0.04, 0.04, 0.25, 0.02, 0.06, 0.04,
        0.09, 0.03
    ]  # sum(weights) = 1
    photographers_weights = [
        0.1, 0.15, 0.12, 0.09, 0.06, 0.09, 0.07, 0.04, 0.09, 0.02, 0.02, 0.04,
        0.07, 0.04
    ]  # sum(weights) = 1
    buisness_man_weights = [
        0.1, 0.07, 0.06, 0.09, 0.02, 0.09, 0.07, 0.11, 0.1, 0.04, 0.09, 0.05,
        0.02, 0.09
    ]  # sum(weights) = 1
    travelers_weights = [
        0.08, 0.12, 0.1, 0.06, 0.11, 0.08, 0.07, 0.06, 0.01, 0.05, 0.07, 0.09,
        0.07, 0.03
    ]  # sum(weights) = 1

    groups_weights = [
        regular_users_weights, children_weights, photographers_weights,
        buisness_man_weights, travelers_weights
    ]

    validate_weight_groups(groups_weights)

    #
    # Calculate the balance for the best result, you can mix from all the groups with the wieght values
    #

    # balance vector between the groups
    balance_vector = [0, 0, 0, 0, 1]
    validate_balance_vector(balance_vector)

    #
    # compute the balanced weights
    #

    weights = [
        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
    ]

    for i in range(len(weights)):
        st = 0
        for j in range(len(balance_vector)):
            st += balance_vector[j] * groups_weights[j][i]
        weights[i] = st

    #
    # criteria
    #

    # criteria for what is good value min or max
    criteria = [1, 1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, -1,
                1]  # -1 -> minimum is best, 1 -> maximum is best

    #
    # run TOPSIS
    #

    rc = cl.TOPSIS()
    rcc = rc.decide(matrix, criteria, weights)
    display_matrix = to_result_object(names, properties, rcc, weights, True)
    print(to_table_string(display_matrix))
示例#9
0
    def loop(self):
        """Periodic job."""
        if self.__mcda_descriptor is not None and self.__active:

            # Step 1: creating structure to handle all metrics
            self.create_mcda_structure()

            # Step 2: Update WTP/LVAP association map
            self.update_wtp_association_map()

            # Step 3: for each criteria, get all metrics and populate structure
            for crr_criteria in self.__mcda_descriptor['criteria']:
                if crr_criteria == 'wtp_load_measured_mbps':
                    if not self.get_wtp_load_measurements():
                        return
                elif crr_criteria == 'wtp_queue_delay_ms':
                    if not self.get_wtp_queue_delay_measurements():
                        return
                elif crr_criteria == 'wtp_channel_load_rate':
                    if not self.get_wtp_channel_load_measurements():
                        return
                elif crr_criteria == 'wtp_sta_rssi_dbm':
                    if not self.get_lvap_rssi_measurements():
                        return
                elif crr_criteria == 'wtp_load_expected_mbps':
                    self.initialize_wtp_load_expected()
                elif crr_criteria == 'sta_association_flag':
                    self.get_sta_association_flag()

            # Step 4: get all flows from flow manager APP
            if self.get_flow_handler():
                if self.__flow_handler['flows'] is not None:

                    # Step 5: Compute WTP expected load if present in the criteria
                    if 'wtp_load_expected_mbps' in self.__mcda_descriptor['criteria']:
                        self.compute_wtp_load_expected_mbps()

                    # Step 6: for each lvap in the network, get a decision using the TOPSIS method
                    for lvap in self.lvaps():
                        crr_lvap_addr = str(lvap.addr)

                        # Create MCDA structure
                        mtx = []
                        wtp_addresses = []
                        for crr_wtp_addr in self.__mcda_handover_manager['wtps']:
                            wtp_addresses.append(crr_wtp_addr)
                            if crr_lvap_addr in self.__mcda_handover_manager['wtps'][crr_wtp_addr]['lvaps']:
                                mtx.append(
                                    self.__mcda_handover_manager['wtps'][crr_wtp_addr]['lvaps'][crr_lvap_addr]['metrics'][
                                        'values'])

                        # if any of the (active) flows in this LVAP is QoS, use QoS weights
                        # otherwise, stick with the BE weights
                        mcda_weights = self.__mcda_descriptor['weights_be']
                        if crr_lvap_addr in self.__flow_handler['lvap_flow_map']:
                            if any(i in self.__flow_handler['lvap_flow_map'][crr_lvap_addr] for i in self.__flow_handler['qos_flows']):
                                mcda_weights = self.__mcda_descriptor['weights_qos']


                        # Lists must have the same length
                        data = Data(mtx,
                                    self.__mcda_targets,
                                    weights=mcda_weights,
                                    anames=wtp_addresses,
                                    cnames=self.__mcda_descriptor['criteria'])

                        dm = closeness.TOPSIS()
                        dec = dm.decide(data)
                        best_alternative_wtp_addr = data.anames[dec.best_alternative_]

                        if self.__db_monitor:
                            for i in range(0, len(mtx)):
                                closeness_list = dec.e_.closeness.tolist()
                                ranks = dec.rank_.tolist()
                                closeness_res = closeness_list[i]
                                if math.isnan(closeness_res):
                                    closeness_res = None

                                fields = ['LVAP_ADDR', 'WTP_ADDR'] + \
                                         self.__mcda_descriptor['criteria'] + \
                                         ['RANK', 'CLOSENESS']

                                values = [crr_lvap_addr, wtp_addresses[i]] + mtx[i] + [ranks[i], closeness_res]
                                # Saving into db
                                self.monitor.insert_into_db(table='mcda_results', fields=fields, values=values)

                            # # TODO: Improve writing info...
                            # f = open(self.__mcda_results_filename, 'w+')
                            # f.write('Decision for LVAP: ' + crr_lvap_addr + '\n' + str(
                            #     dec) + '\nMove to WTP: ' + best_alternative_wtp_addr + '\n')
                            # f.close()

                        # Step 7: is handover needed? Do it and set the flag to 0 for all other blocks
                        # (this could be improved, but get block with given address should be implemented)
                        # Compute WTP expected load if present in the criteria
                        if 'sta_association_flag' in self.__mcda_descriptor['criteria']:
                            sta_association_index = self.__mcda_descriptor['criteria'].index('sta_association_flag')
                        old_wtp_addr = None
                        for block in self.blocks():
                            crr_wtp_addr = str(block.addr)
                            if lvap.blocks[0] is not None:
                                if crr_wtp_addr == best_alternative_wtp_addr:
                                    # Do handover to this block only if the station is not connected to it
                                    sta_crr_wtp_addr = str(lvap.blocks[0].addr)
                                    if sta_crr_wtp_addr != best_alternative_wtp_addr:
                                        self.log.info("Handover triggered!")
                                        old_wtp_addr = sta_crr_wtp_addr
                                        # Handover now..
                                        lvap.blocks = block
                                    # and update metrics
                                    if 'sta_association_flag' in self.__mcda_descriptor['criteria']:
                                        self.__mcda_handover_manager['wtps'][crr_wtp_addr]['lvaps'][crr_lvap_addr]['metrics'][
                                            'values'][sta_association_index] = 1
                                elif 'sta_association_flag' in self.__mcda_descriptor['criteria']:
                                    self.__mcda_handover_manager['wtps'][crr_wtp_addr]['lvaps'][crr_lvap_addr]['metrics'][
                                        'values'][sta_association_index] = 0

                        # Recalculate WTP expected load on handover, if any...
                        # OBS: not possible to access lvap.blocks[0] while performing handover
                        if 'wtp_load_expected_mbps' in self.__mcda_descriptor['criteria']:
                            if old_wtp_addr is not None:
                                self.recalculate_wtp_load_expected_mbps(old_wtp_addr=old_wtp_addr,
                                                                        best_alternative_wtp_addr=best_alternative_wtp_addr,
                                                                        moving_lvap_addr=crr_lvap_addr)

            # Start considering association and expected load from now on...
            if self.__initial_association:
                self.__initial_association = False

            if self.__db_monitor is not None:
                fields = self.__mcda_descriptor['criteria'] + ['TYPE']
                values = self.__mcda_descriptor['weights_qos'] + ['QoS']

                # Saving into db
                self.monitor.insert_into_db(table='mcda_weights', fields=fields, values=values)

                fields = self.__mcda_descriptor['criteria'] + ['TYPE']
                values = self.__mcda_descriptor['weights_be'] + ['BE']

                # Saving into db
                self.monitor.insert_into_db(table='mcda_weights', fields=fields, values=values)

                # Keeping only the last measurements in db
                self.monitor.keep_last_measurements_only('mcda_association_stats')
                self.monitor.keep_last_measurements_only('mcda_results')
                self.monitor.keep_last_measurements_only('mcda_weights')
#The index of the row of the best alternative (if this decision is a 𝛾-solution)
print(dec.best_alternative_, data.anames[dec.best_alternative_])

#And the kernel of the non supered alternatives (if this decision is a 𝛽-solution)
# this return None because this
# decision is not a beta-solution
print(dec.kernel_)

dm = simple.WeightedProduct()
print(dm)

dec = dm.decide(data)
print(dec)

#TOPSIS
dm = closeness.TOPSIS()
print(dm)

dec = dm.decide(data)
print(dec)

#The TOPSIS add more information into the decision object.
print(dec.e_)
print("Ideal:", dec.e_.ideal)
print("Anti-Ideal:", dec.e_.anti_ideal)
print("Closeness:", dec.e_.closeness)

#Finally we can change the normalization criteria of the alternative matric to sum (divide every value by the sum opf
#their criteria) and check the result:
#dm = closeness.TOPSIS(mnorm="sum")
dm = closeness.TOPSIS()
示例#11
0
文件: app.py 项目: audrynyonata/MCRS
def calculate(id):
    # fetch & preprocess
    project = mongo.db.projects.find_one_or_404({"id": id})
    projectCharacteristics = {p["id"]: p for p in project["characteristics"]}

    cnames = [*projectCharacteristics]

    # fetch & preprocess characteristics
    cursor = mongo.db.characteristics.find({"id": {"$in": cnames}})
    characteristics = {d["id"]: d for d in cursor}

    # fetch & preprocess methodchunks
    cursor = mongo.db.methodchunks.find(
        {"characteristics.id": {
            "$in": cnames
        }})
    method_chunks = {}
    for document in cursor:
        document["characteristics"] = {
            d["id"]: d
            for d in document["characteristics"]
        }
        method_chunks[document["id"]] = document

    # create encoder
    from sklearn.preprocessing import OrdinalEncoder
    for cid, pc in projectCharacteristics.items():
        if (pc["rule"] == "preference_list"):
            pass
        elif (pc["rule"] == "exact"):
            pass
        else:  #maximum, minimum
            if (cid in characteristics):
                for cv in characteristics[cid]["characteristicValues"]:
                    if (cv["ref"] == pc["ref"]):
                        if (pc["rule"] == "maximum"):
                            pc["value"] = list(reversed(cv["values"]))
                        else:
                            pc["value"] = cv["values"]
                        break
        values = pc["value"] + ["N/A"]
        values.reverse()  # ordinal values asc order (smallest to largest)
        enc = OrdinalEncoder(categories=[values])
        enc.fit([[v] for v in values])
        pc["encoder"] = enc

    # build mtx
    import pandas as pd
    df = pd.DataFrame([], columns=cnames)
    for mid, m in method_chunks.items():
        obj = {}
        for cid, pc in projectCharacteristics.items():
            if (cid in m["characteristics"]):
                if (pc["ref"] == m["characteristics"][cid]["ref"]):
                    obj[cid] = m["characteristics"][cid]["value"]
        df = df.append(pd.Series(obj, index=df.columns, name=mid))
    df.fillna("N/A", inplace=True)
    print(df)
    separator()

    if len(df.index.values) == 0:
        return "No match"
    # apply encoding
    encoded = df.copy()
    for key, value in encoded.items():
        values = [
            v if v in projectCharacteristics[key]["value"] else "N/A"
            for v in value
        ]
        encoded.loc[:, key] = projectCharacteristics[key]["encoder"].transform(
            [[v] for v in values])
    encoded = encoded.loc[:, (encoded != 0).any(axis=0)]
    # print(encoded)
    # separator()

    # construct
    from skcriteria import Data, MAX
    from skcriteria.madm import simple, closeness
    optimal_senses = []
    weights = []
    for cid, pc in encoded.items():
        optimal_senses.append(MAX)
        weights.append(projectCharacteristics[cid].get("weight", 1))

    data = Data(encoded.values,
                optimal_senses,
                weights=weights,
                anames=encoded.index,
                cnames=encoded.columns)
    #print(data)
    #separator()

    # WeightedSum
    model = simple.WeightedSum(mnorm="vector", wnorm="sum")
    de = model.decide(data)
    print(de)
    separator()

    print(de.e_)
    print("Points:", de.e_.points)

    # TOPSIS
    model2 = closeness.TOPSIS(mnorm="vector", wnorm="sum")
    de2 = model2.decide(data)
    print(de2)
    separator()

    print(de2.e_)
    print("Ideal:", de2.e_.ideal)
    print("Anti-Ideal:", de2.e_.anti_ideal)
    print("Closeness:", de2.e_.closeness)

    # build response
    res = {}
    for cid, mc in method_chunks.items():
        mc["characteristics"] = [c for cid, c in mc["characteristics"].items()]
    z = [{
        "methodChunk": method_chunks[de._data._anames[i]],
        "score": de.e_.points[i],
        "rank": int(de._rank[i])
    } for i in range(0, len(de.mtx))]
    z2 = [{
        "methodChunk": method_chunks[de2._data._anames[i]],
        "score": de2.e_.closeness[i],
        "rank": int(de2._rank[i])
    } for i in range(0, len(de2.mtx))]

    res["results"] = [{
        "model": "WeightedSum",
        "values": sorted(z, key=lambda x: x["rank"])
    }, {
        "model": "TOPSIS",
        "values": sorted(z2, key=lambda x: x["rank"])
    }]

    for cid, pc in projectCharacteristics.items():
        pc.pop("_id", None)
        pc["encoder"] = pc["encoder"].categories[0]
    project["characteristics"] = [
        pc for cid, pc in projectCharacteristics.items()
    ]
    project.pop("_id", None)
    res["project"] = project

    # print(res)
    # tes = {}
    # tes["project"] = projectCharacteristics
    # tes["method_chunks"] = method_chunks
    # tes["characteristics"] = characteristics

    default = lambda o: f"<<non-serializable: {type(o).__qualname__}>>"
    result = json.loads(json_util.dumps(res, default=default))
    return result
示例#12
0
    def flow(self,
             models_to_flow=[],
             params=None,
             test_size=0.2,
             nfolds=3,
             nrepeats=3,
             n_jobs=1,
             metrics=[],
             verbose=False,
             regressors=True,
             ensemble=False,
             featurePercentage=0.25):

        # Enforce parameters
        assert isinstance(nfolds, int), "nfolds must be integer"
        assert isinstance(nrepeats, int), "nrepeats must be integer"
        assert isinstance(n_jobs, int), "n_jobs must be integer"
        assert isinstance(verbose, bool), "verbosem ust be bool"
        assert isinstance(params, dict), "params must be a dict"
        assert isinstance(test_size, float), "test_size must be a float"
        assert isinstance(metrics, list), "model scoring must be a list"
        assert isinstance(regressors, bool), "regressor must be bool"
        assert isinstance(ensemble, bool), "ensemble must be bool"

        # Enforce logic for regressors
        #if regressors:
        #  assert(not any(["c" in k.split("__") for k,v in params.items()]), "You selected classifiers with the regressors flag true. Comon\' man!")
        #else:
        #  assert(not any(["r" in k.split("__") for k,v in params.items()]), "You selected regressors with the regressors flag false. Comon\' man!")

        self._nfolds = nfolds
        self._nrepeats = nrepeats
        self._n_jobs = n_jobs
        self._verbose = verbose
        self._allParams = params
        self._metrics = metrics
        self._test_size = test_size
        self._regressors = regressors
        self._ensemble = ensemble
        self._featurePercentage = featurePercentage

        # Inform the streamline to user.
        stringbuilder = ""
        for thing in models_to_flow:
            stringbuilder += thing
            stringbuilder += " --> "

        if self._verbose:

            if self._regressors:
                print("*************************")
                print("=> (Regressor) " + "=> Feature Selection Streamline: " +
                      stringbuilder[:-5])
                print("*************************")
            elif self._regressors == False:
                print("*************************")
                print("=> (Classifier) " +
                      "=> Feature Selection Streamline: " + stringbuilder[:-5])
                print("*************************")
            else:
                print(
                    "Invalid model selected. Please set regressors=True or regressors=False."
                )
                print

        def supportVectorRegression():
            self._svr_params = {}
            for k, v in self._allParams.items():
                if "svr" in k:
                    self._svr_params[k] = v

            self._svr_params["svr__kernel"] = ['linear']
            model = SupportVectorRegressorPredictiveModel(
                self._X_train, self._y_train, self._svr_params, self._nfolds,
                self._n_jobs, self._verbose)
            return abs(model.getBestEstimator().coef_.flatten())

        def randomForestRegression():
            self._rfr_params = {}
            for k, v in self._allParams.items():
                if "rfr" in k:
                    self._rfr_params[k] = v

            model = RandomForestRegressorPredictiveModel(
                self._X_train, self._y_train, self._rfr_params, self._nfolds,
                self._n_jobs, self._verbose)
            return abs(model.getBestEstimator().feature_importances_.flatten())

        def adaptiveBoostingRegression():
            self._abr_params = {}
            for k, v in self._allParams.items():
                if "abr" in k:
                    self._abr_params[k] = v

            model = AdaptiveBoostingRegressorPredictiveModel(
                self._X_train, self._y_train, self._abr_params, self._nfolds,
                self._n_jobs, self._verbose)
            return abs(model.getBestEstimator().feature_importances_.flatten())

        def lassoRegression():
            self._lasso_params = {}
            for k, v in self._allParams.items():
                if "lasso" in k:
                    self._lasso_params[k] = v

            model = LassoRegressorPredictiveModel(self._X_train, self._y_train,
                                                  self._lasso_params,
                                                  self._nfolds, self._n_jobs,
                                                  self._verbose)
            return abs(model.getBestEstimator().coef_.flatten())

        def elasticNetRegression():
            self._enet_params = {}
            for k, v in self._allParams.items():
                if "enet" in k:
                    self._enet_params[k] = v

            model = ElasticNetRegressorPredictiveModel(
                self._X_train, self._y_train, self._enet_params, self._nfolds,
                self._n_jobs, self._verbose)
            return abs(model.getBestEstimator().coef_.flatten())

        def mixed_selection():

            if self._verbose:
                print("Executing: mixed_selection")

            X = self._X
            y = self._y

            initial_list = []
            threshold_in_specified = False
            threshold_out_specified = False

            if "mixed_selection__threshold_in" in self._allParams.keys():
                assert (isinstance(
                    self._allParams["mixed_selection__threshold_in"],
                    float), "threshold_in must be a float")
                threshold_in = self._allParams["mixed_selection__threshold_in"]
                threshold_in_specified = True
            else:
                threshold_in = 0.01

            if "mixed_selection__threshold_out" in self._allParams.keys():
                assert (isinstance(
                    self._allParams["mixed_selection__threshold_out"],
                    float), "threshold_out must be a float")
                threshold_out = self._allParams[
                    "mixed_selection__threshold_out"]
                threshold_out_specified = True
            else:
                threshold_out = 0.05

            if "mixed_selection__verbose" in self._allParams.keys():
                assert (isinstance(self._allParams["mixed_selection__verbose"],
                                   bool), "verbose must be a bool")
                verbose = self._allParams["mixed_selection__verbose"]
            else:
                verbose = False

            if threshold_in_specified and threshold_out_specified:
                assert (
                    threshold_in < threshold_out,
                    "threshold in must be strictly less than the threshold out to avoid infinite looping."
                )

            #initial_list = self._initial_list
            #threshold_in = self._threshold_in
            #threshold_out = self._threshold_out
            #verbse = self._verbose
            """ Perform a forward-backward feature selection 
            based on p-value from statsmodels.api.OLS
            Arguments:
                X - pandas.DataFrame with candidate features
                y - list-like with the target
                initial_list - list of features to start with (column names of X)
                threshold_in - include a feature if its p-value < threshold_in
                threshold_out - exclude a feature if its p-value > threshold_out
                verbose - whether to print the sequence of inclusions and exclusions
            Returns: list of selected features 
            Always set threshold_in < threshold_out to avoid infinite looping.
            See https://en.wikipedia.org/wiki/Stepwise_regression for the details
            """

            included = list(initial_list)
            while True:
                changed = False

                # forward step
                excluded = list(set(X.columns) - set(included))
                new_pval = pd.Series(index=excluded)

                for new_column in excluded:

                    model = sm.OLS(
                        y,
                        sm.add_constant(
                            pd.DataFrame(X[included + [new_column]]))).fit()
                    new_pval[new_column] = model.pvalues[new_column]

                best_pval = new_pval.min()

                if best_pval < threshold_in:
                    best_feature = new_pval.idxmin()
                    #best_feature = new_pval.argmin()
                    included.append(best_feature)
                    changed = True
                    if verbose:
                        print('Adding  {:30} with p-value {:.6}'.format(
                            best_feature, best_pval))

                # backward step
                model = sm.OLS(y, sm.add_constant(pd.DataFrame(
                    X[included]))).fit()
                # use all coefs except intercept
                pvalues = model.pvalues.iloc[1:]
                worst_pval = pvalues.max()  # null if pvalues is empty
                if worst_pval > threshold_out:
                    changed = True
                    worst_feature = pvalues.idxmax()
                    #worst_feature = pvalues.argmax()
                    included.remove(worst_feature)
                    if verbose:
                        print('Dropping {:30} with p-value {:.6}'.format(
                            worst_feature, worst_pval))

                if not changed:
                    break

            new_included = []
            for col in X.columns:
                if col in included:
                    new_included.append(1)
                else:
                    new_included.append(0)

            return new_included

        def partialLeastSquaresRegression():

            if self._verbose:
                print("Executing: plsr")
            # The components are not helpful for this context. They might be for transformation, however.
            #if "plsr__n_components" in self._allParams.keys():
            #  n_components = self._allParams["plsr__n_components"]
            #else:
            #  n_components = 2
            pls_model = PLSRegression()
            pls_out = pls_model.fit(self._X, self._y)

            # The coefficients are used to show direction of the relationship
            return abs(pls_out.coef_.flatten())

        ############################################
        ########## Classifiers Start Here ##########
        ############################################

        def adaptiveBoostingClassifier():
            self._abc_params = {}
            for k, v in self._allParams.items():
                if "abc" in k:
                    self._abc_params[k] = v

            model = AdaptiveBoostingClassifierPredictiveModel(
                self._X_train, self._y_train, self._abc_params, self._nfolds,
                self._n_jobs, self._verbose)
            return model.getBestEstimator().feature_importances_.flatten()

        def randomForestClassifier():
            self._rfc_params = {}
            for k, v in self._allParams.items():
                if "rfc" in k:
                    self._rfc_params[k] = v

            model = RandomForestClassifierPredictiveModel(
                self._X_train, self._y_train, self._rfc_params, self._nfolds,
                self._n_jobs, self._verbose)
            return model.getBestEstimator().feature_importances_.flatten()

        def supportVectorClassifier():
            self._svc_params = {}
            for k, v in self._allParams.items():
                if "svc" in k:
                    self._svc_params[k] = v

            self._svc_params["svc__kernel"] = ['linear']
            model = SupportVectorClassifierPredictiveModel(
                self._X_train, self._y_train, self._svc_params, self._nfolds,
                self._n_jobs, self._verbose)

            coefs = model.getBestEstimator().coef_
            prods = coefs[0, :]
            for i in range(1, len(coefs)):
                prods = np.multiply(prods, coefs[i, :])
            return abs(prods)

        # Valid regressors
        regression_options = {
            "mixed_selection": mixed_selection,
            "svr": supportVectorRegression,
            "rfr": randomForestRegression,
            "abr": adaptiveBoostingRegression,
            "lasso": lassoRegression,
            "enet": elasticNetRegression,
            "plsr": partialLeastSquaresRegression
        }

        # Valid classifiers
        classification_options = {
            'abc': adaptiveBoostingClassifier,
            'rfc': randomForestClassifier,
            'svc': supportVectorClassifier
        }

        # Define return dictionary
        return_dict = {}

        # Train test split
        self._X_train, self._X_test, self._y_train, self._y_test = train_test_split(
            self._X, self._y, test_size=self._test_size)

        # Wrapper models
        self._key_features = {}

        if self._regressors:
            for key in models_to_flow:
                self._key_features[key] = regression_options[key]()
        elif self._regressors == False:
            for key in models_to_flow:
                self._key_features[key] = classification_options[key]()
        else:
            print(
                "Invalid model type. Please set regressors=True or regressors=False."
            )
            print
        if self._verbose:
            print

        return_dict['feature_importances'] = self._key_features

        self._ensemble_results = None
        self._kept_features = None
        if self._ensemble:

            alternative_names = self._X.columns.tolist()
            criterion_names = list(self._key_features.keys())
            criteria = [MAX for i in criterion_names]
            weights = [
                i / len(criterion_names) for i in range(len(criterion_names))
            ]

            df = pd.DataFrame(self._key_features, index=alternative_names)

            data = Data(df.as_matrix(),
                        criteria,
                        weights,
                        anames=df.index.tolist(),
                        cnames=df.columns)
            #if self._verbose:
            #data.plot("radar");

            dm1 = simple.WeightedSum()
            dm2 = simple.WeightedProduct()
            dm3 = closeness.TOPSIS()
            dec1 = dm1.decide(data)
            dec2 = dm2.decide(data)
            dec3 = dm3.decide(data)

            self._ensemble_results = pd.DataFrame(
                {
                    "TOPSIS": dec3.rank_,
                    "WeightedSum": dec1.rank_,
                    "WeightedProduct": dec2.rank_
                },
                index=df.index.tolist())

            # Only keep features that our decision makers deemed in the top % specified
            num_features_requested = math.ceil(
                len(alternative_names) * self._featurePercentage)
            ranks = dec1.rank_ + dec2.rank_ + dec3.rank_
            argmin_sorted = np.argpartition(ranks, num_features_requested)
            self._kept_features = []

            count = 0
            for i in argmin_sorted:
                self._kept_features.append(alternative_names[i])
                count += 1
                if count >= num_features_requested:
                    break

            print("", self._featurePercentage * 100,
                  " % -> (" + str(num_features_requested) + ") features kept.")
            print(self._kept_features)
            # Print data with only those features
            return_dict['ensemble_results'] = self._ensemble_results
            return_dict['kept_features'] = self._kept_features

        return return_dict
示例#13
0
def get_madm_concensus(Wijk=None, num_optimizers=100, data_shape=(10,5), batch_size=10, policy=np.average, verbose=False):
        
    # Get data from simulation
    if Wijk is None:
        Wijk, move_sequence = get_k_optimizations(num_optimizers=num_optimizers,
                                                  data_shape=data_shape,
                                                  batch_size=batch_size, 
                                                  verbose=verbose)
    
    # Construct alternative-space
    alternatives={}
    alternative_num=0
    for i in range(Wijk[:,:,0].shape[0]):
        for j in range(Wijk[:,:,0].shape[1]):
            alternatives[alternative_num]=(i,j)
            alternative_num+=1
    #print(alternatives)
    
    # Construct decision-matrix
    DM=np.empty((alternative_num,Wijk.shape[2]))
    for a,loc in alternatives.items():
        for k in range(Wijk.shape[2]):
            DM[a,k]=Wijk[loc[0],loc[1],k]
    #print(DM)
    
    # Putting it all together
    alternative_names = [v for k,v in alternatives.items()]
    criterion_names = [k for k in range(Wijk.shape[2])]
    criteria = [MAX for i in criterion_names]
    weights = [1/len(criterion_names) for i in range(len(criterion_names))]
    df = pd.DataFrame(DM,
                      index=alternative_names,
                      columns=criterion_names)
     
    if verbose:
        print("Alternatives {}".format(alternative_names))
        print("Criteria {}".format(criterion_names))
        print("Weights {}".format(weights))
        print("Decision Matrix {}".format(df))
    
    
    # Execute MADM
    data = Data(df.as_matrix(),
                criteria,
                weights,
                anames=df.index.tolist(),
                cnames=df.columns
                )
    
    # Execute on 3 decision makers
    dm1 = simple.WeightedSum()
    dm2 = simple.WeightedProduct()
    dm3 = closeness.TOPSIS()
    dec1 = dm1.decide(data)
    dec2 = dm2.decide(data)
    dec3 = dm3.decide(data)
    
    ranks=[dec1.rank_, dec2.rank_,dec3.rank_]
    results = pd.DataFrame({"TOPSIS":dec3.rank_,
                            "WeightedSum":dec1.rank_,
                            "WeightedProduct":dec2.rank_},
                            index=df.index.tolist())
    
    if verbose:
        print("MADM Results: {}".format(results))
    concensus_results=pd.DataFrame({"ConsensusRank":policy(results, axis=1)},index=results.index)
    rij=concensus_results.as_matrix().reshape(Wijk.shape[0],Wijk.shape[1])
    rij_move_sequence=np.argmin(rij,axis=1)
    #if verbose:
    #    print("rij {}".format(rij))
    #    print("rij_move_sequence {}".format(rij_move_sequence))
    return rij, rij_move_sequence

#wijk,_=get_k_optimizations(data=None, num_optimizers=5, data_shape=(10,5), batch_size=5, verbose=True)

#rij, _=get_madm_concensus(Wijk=wijk, policy=np.average )
#print(rij)
#print(_)