示例#1
0
        def train(self,
                  training_data_t: pd.DataFrame,
                  training_data_f: pd.DataFrame,
                  user_id=None):
            self.copula_dict = {}
            self.marg_dict = {}
            self.kl_dict = {}
            max_kl_value = -1000
            self.kl_dict = {'sum': 0}
            for score_type in util.DEFAULT_SCORE_TYPE_LIST:
                marg = marginal.Norm(training_data_t[score_type])
                self.marg_dict[score_type] = marg
                kl = util.kl_divergence_between_population_and_users(
                    marg, score_type)
                self.kl_dict['sum'] += kl
                self.kl_dict[score_type] = kl
                if kl > max_kl_value:
                    max_kl_value = kl
                    self.main_axis = score_type
            self.kl_dict['sum'] -= max_kl_value

            main_marg = marginal.Norm(training_data_t[self.main_axis])
            main_cdf_list = [
                main_marg.cdf(x) for x in training_data_t[self.main_axis]
            ]
            for score_type in util.DEFAULT_SCORE_TYPE_LIST:
                if score_type == self.main_axis:
                    continue
                marginal_cdf_list_list = [
                    main_cdf_list,
                    [marg.cdf(x) for x in training_data_t[score_type]]
                ]
                cdf_matrix = np.matrix(marginal_cdf_list_list).T
                self.copula_dict[score_type] = copula.Copula(
                    cdf_matrix, self.cop)
示例#2
0
def adhoc_task():
    ROLE1 = [7, 12]
    ROLE2 = [1, 2, 5, 6]
    ROLE3 = [8, 9, 10]
    ROLE4 = [3, 4, 11]

    user_all = []
    user_all.append(pd.read_json(measure.InputDir+"/user1_kfolded.json"))
    user_all.append(pd.read_json(measure.InputDir+"/user2_kfolded.json"))
    user_all.append(pd.read_json(measure.InputDir+"/user3_kfolded.json"))
    user_all.append(pd.read_json(measure.InputDir+"/user4_kfolded.json"))
    user_all.append(pd.read_json(measure.InputDir+"/user5_kfolded.json"))
    user_all.append(pd.read_json(measure.InputDir+"/user6_kfolded.json"))
    user_all.append(pd.read_json(measure.InputDir+"/user7_kfolded.json"))
    user_all.append(pd.read_json(measure.InputDir+"/user8_kfolded.json"))
    user_all.append(pd.read_json(measure.InputDir+"/user9_kfolded.json"))
    user_all.append(pd.read_json(measure.InputDir+"/user10_kfolded.json"))
    user_all.append(pd.read_json(measure.InputDir+"/user11_kfolded.json"))
    user_all.append(pd.read_json(measure.InputDir+"/user12_kfolded.json"))

    user_norm = [{x:marginal.Norm(user[x]) for x in DEFAULT_SCORE_TYPE_LIST} for user in user_all]

    dic = {}
    for x in DEFAULT_SCORE_TYPE_LIST:
        dic[x] = 0
    for i in ROLE4:
        norm_dict = user_norm[i-1]
        for x in DEFAULT_SCORE_TYPE_LIST:
            dic[x] += np.log1p(kl_divergence_between_population_and_users(norm_dict[x], x))

    for k, v in dic.items():
        men = v / len(ROLE4)
        print(k, men)
示例#3
0
    def train(self, training_data_t: pd.DataFrame,
              training_data_f: pd.DataFrame, user_id):
        marg_dict = {}
        kl_dict = {}
        max_kl_value = 0
        for score_type in util.DEFAULT_SCORE_TYPE_LIST:
            marg = marginal.Norm(training_data_t[score_type])
            marg_dict[score_type] = marg
            kl = util.kl_divergence_between_population_and_users(
                marg, score_type)
            kl_dict[score_type] = kl
            if kl > max_kl_value:
                max_kl_value = kl
                self.main_axis = score_type

        self.marg_dict = marg_dict
        print(kl_dict.values())
        new_kl_dict = {k: v for k, v in kl_dict.items() if v > 0.05}
        self.score_type_list = [
            k for k, v in sorted(new_kl_dict.items(), key=lambda x: x[1])
        ]
        if len(self.score_type_list) == 1:
            self.score_type_list = util.DEFAULT_SCORE_TYPE_LIST
        else:
            kl_dict = new_kl_dict
        #todo remove
        for score_type in self.score_type_list:
            print(score_type, kl_dict[score_type])
        #todo remove
        nested_copula_list = []
        for i in range(0, len(self.score_type_list) - 1):
            former_kl = kl_dict[self.score_type_list[i]]
            current_kl = kl_dict[self.score_type_list[i + 1]]
            nested_copula_list.append(
                copula.Copula(np.matrix([]),
                              'gumbel',
                              param=1 + self.param_a * np.log1p(current_kl),
                              dim=2))
        self.nested_copula_list = nested_copula_list
示例#4
0
    def train(self,
              training_data_t: pd.DataFrame,
              training_data_f: pd.DataFrame,
              user_id=None):
        self.copula_dict = {}
        self.marg_dict = {}
        self.kl_dict = {}
        max_kl_value = -1000
        self.kl_dict = {'sum': 0}
        score_type_list = []
        for score_type in util.DEFAULT_SCORE_TYPE_LIST:
            marg = marginal.Norm(training_data_t[score_type])
            self.marg_dict[score_type] = marg
            kl = util.kl_divergence_between_population_and_users(
                marg, score_type)
            print(score_type, kl)
            self.kl_dict['sum'] += kl
            self.kl_dict[score_type] = kl
            if kl > max_kl_value:
                max_kl_value = kl
                self.main_axis = score_type
        print(self.main_axis)
        self.kl_dict['sum'] -= max_kl_value
        main_marg = marginal.Norm(training_data_t[self.main_axis])
        main_cdf_list = [
            main_marg.cdf(x) for x in training_data_t[self.main_axis]
        ]
        for score_type in self.score_type_list:
            if score_type == self.main_axis:
                continue
            target = [self.main_axis, score_type]
            clust = models.create_cluster(training_data_t, self.n_clusters,
                                          target)
            self.copula_dict[
                score_type] = models.create_weight_and_scoring_model_list(
                    clust, self.marg, self.cop, target, [score_type], [])
        each_copula_cdf_list_list = []
        each_copula_cdf_list_list2 = []
        each_copula_cdf_list_list3 = []
        for score_type in self.score_type_list:
            if score_type == self.main_axis:
                continue
            each_copula_cdf_list_list.append([])
            each_copula_cdf_list_list2.append([])
            each_copula_cdf_list_list3.append([])

        for index, row in training_data_t.iterrows():
            #main_cdf = self.marg_dict[self.main_axis].cdf(row[self.main_axis])
            cnt = 0
            for score_type in self.score_type_list:
                if score_type == self.main_axis:
                    continue
                main_cdf = 0
                sub_cdf = 0
                cop_cdf = 0
                for weight_and_scoring_model in self.copula_dict[score_type]:
                    weight = weight_and_scoring_model[0]
                    score_model = weight_and_scoring_model[1]
                    marginal_cdf_list = []
                    for axis in [self.main_axis, score_type]:
                        marginal_score_model = score_model[axis]
                        marg_cdf = marginal_score_model.cdf(row[axis])
                        marginal_cdf_list.append(marg_cdf)
                        if axis != self.main_axis:
                            sub_cdf += weight * marg_cdf
                        else:
                            main_cdf += weight * marg_cdf
                    cop_cdf += score_model['copula'].cdf(
                        np.matrix(marginal_cdf_list)) * weight

                each_copula_cdf_list_list[cnt].append(cop_cdf)
                each_copula_cdf_list_list2[cnt].append(cop_cdf * sub_cdf)
                each_copula_cdf_list_list3[cnt].append(cop_cdf * main_cdf *
                                                       sub_cdf)
                cnt += 1

        cop_mat1 = np.matrix(each_copula_cdf_list_list).T
        cop_mat2 = np.matrix(each_copula_cdf_list_list2).T
        cop_mat3 = np.matrix(each_copula_cdf_list_list3).T
        if self.indep_copulaed:
            pass
        else:
            self.top_copula1 = copula.Copula(cop_mat1, self.cop)
            self.top_copula2 = copula.Copula(cop_mat2, self.cop)
            self.top_copula3 = copula.Copula(cop_mat3, self.cop)