def calc_client_prop(self,
                         show_progress=False,
                         user_info_koeff=0,
                         squared=True,
                         set_already_bough_prop=False):
        client_prop = np.zeros((self.n_Kunden, self.n_Produkte))
        print("Calculate Propabilities:", "." * 100)

        if user_info_koeff != 0:
            user_info = get_info()
            user_info_matrix = (
                user_info.dot(user_info.T) / 5
            )  # 5 ist die anzahl der one hot encodeten features

        for kunden_index in range(self.n_Kunden):

            if show_progress:
                load.print_progress(kunden_index / self.n_Kunden,
                                    "Calculate Propabilities")

            kunden_vektor = self.KPM[kunden_index]

            kunden_buy_list = np.argwhere(kunden_vektor == 1)[:, 0]

            for produkt_index in range(self.n_Produkte):
                if produkt_index in kunden_buy_list and set_already_bough_prop:
                    client_prop[kunden_index,
                                produkt_index] = set_already_bough_prop
                else:
                    P_x = sum(
                        np.sum(self.KPM[:, kunden_buy_list], axis=1) /
                        len(kunden_buy_list)**2) / self.n_Kunden

                    P_y = self.occ[produkt_index]
                    #P_x_if_y = sum(
                    #   np.sum(self.KPM[np.argwhere(self.KPM[:, produkt_index] == 1)[:, 0]][:, kunden_buy_list],
                    #         axis=1) / len(kunden_buy_list) ** 2) / self.n_Kunden
                    if squared:
                        P_x_if_y = np.sum(
                            self.KPM[np.argwhere(self.KPM[:, produkt_index] ==
                                                 1)[:, 0]][:, kunden_buy_list],
                            axis=1) / len(kunden_buy_list)**2
                    else:
                        P_x_if_y = np.sum(
                            self.KPM[np.argwhere(self.KPM[:, produkt_index] ==
                                                 1)[:, 0]][:, kunden_buy_list],
                            axis=1) / len(kunden_buy_list)

                    if user_info_koeff != 0:
                        P_x_if_y = (
                            1 - user_info_koeff
                        ) * P_x_if_y + user_info_koeff * P_x_if_y * user_info_matrix[
                            kunden_index,
                            np.argwhere(self.KPM[:, produkt_index] == 1)[:, 0]]

                    P_x_if_y = sum(P_x_if_y) / self.n_Kunden
                    client_prop[kunden_index,
                                produkt_index] = P_x_if_y * P_y / P_x

        self.client_prop = client_prop
    def export_as_csv_in_tableau_format(self, pred_set, predictions):
        n_Kunden, n_Produkte = predictions.shape
        dict = {
            "client": [],
            "content": [],
            "propability": [],
            "already_bought": []
        }
        for k in range(n_Kunden):
            if self.show_progress:
                load.print_progress(k / n_Kunden, "export")
            for p in range(n_Produkte):
                if self.split == "clients":
                    dict["client"].append(
                        self.client_split_dict[pred_set]["indexes"][k])
                    dict["already_bought"].append(
                        self.client_split_dict[pred_set]["KPM"][k, p])
                else:
                    dict["client"].append(k)
                    dict["already_bought"].append(self.KPM_dict[pred_set][k,
                                                                          p])
                dict["content"].append(p)
                dict["propability"].append(predictions[k, p])

        title = self.dataset + "_predictions_" + "fit" + self.fit_set + "_pred" + pred_set + "_" + self.model_type + "_approach" + str(
            self.approach) + "_split" + self.split + "_info" + str(
                self.use_user_info) + self.info_string
        pd.DataFrame(dict).to_csv("Tableau_exports/" + title + ".csv",
                                  index_label="Row_index",
                                  sep=";")
 def train_batches(self, model, n_batches):
     for batch_index in range(n_batches):
         if self.show_progress:
             load.print_progress(batch_index / n_batches, "train_batches")
         batch_data = np.load(self.dataset + "/batches/data_batch_no_" +
                              str(batch_index) + ".npy")
         batch_target = np.load(self.dataset + "/batches/target_batch_no_" +
                                str(batch_index) + ".npy")
         model.partial_fit(batch_data,
                           batch_target,
                           classes=np.arange(self.n_Produkte))
     return model
    def get_occ_matricies(self, show_progress=False):
        if_occurence = np.zeros((self.n_Produkte, self.n_Produkte))
        occurence = np.zeros(self.n_Produkte)
        print("get_occ_matricies:", "." * 100)
        for row in range(self.n_Produkte):
            occurence[row] = sum(self.KPM[:, row]) / self.n_Kunden
            if show_progress:
                load.print_progress(row / self.n_Produkte, "get_occ_matricies")
            for col in range(row + 1):
                p_row = occurence[row]
                p_col = occurence[col]
                p_row_and_col = self.KPM[:, row].dot(
                    self.KPM[:, col]) / self.n_Kunden
                # if_occurence[row,col]=P(row|col)
                if_occurence[row,
                             col] = p_row_and_col / p_col if p_col != 0 else 0
                if_occurence[col,
                             row] = p_row_and_col / p_row if p_row != 0 else 0

        self.if_occ = if_occurence
        self.occ = occurence
        return if_occurence, occurence
    def predict_set_approach_1(self, set="test"):

        if self.split == "clients":
            kunden_vektor = self.client_split_dict[set]["KPM"]
        else:
            kunden_vektor = self.KPM_dict[set]

        if self.use_user_info:
            if self.split == "clients":
                kunden_vektor = np.hstack(
                    (kunden_vektor, self.client_split_dict[set]["info"]))
            else:
                kunden_vektor = np.hstack((kunden_vektor, self.info_dict[set]))

        prediction = np.zeros_like(kunden_vektor)
        for index in range(self.n_Produkte):
            if self.show_progress:
                load.print_progress(index / self.n_Produkte, "Prediction")

            prediction[:, index] = self.model_list[index].predict_proba(
                np.delete(kunden_vektor, index, axis=1))[:, 1]
        return prediction
    def make_model_approach_1(self, set="full"):

        if self.use_user_info:
            if self.split == "clients":
                user_info = self.client_split_dict[set]["info"]
            else:
                user_info = self.info_dict[set]
            n_user_features = len(user_info[0])
        else:
            n_user_features = 0

        if self.split == "clients":
            KPM = self.client_split_dict[set]["KPM"]
        else:
            KPM = self.KPM_dict[set]

        self.model_list = []
        for prod_n in range(self.n_Produkte):
            if self.show_progress:
                load.print_progress(prod_n / self.n_Produkte, "make_model")

            target = KPM[:, prod_n]
            data = np.delete(KPM, prod_n, axis=1)

            if self.use_user_info:
                data = np.hstack((data, user_info))

            if self.model_type == "multinomial":
                model = MultinomialNB()
            elif self.model_type == "bernoulli":
                model = BernoulliNB()
            elif self.model_type == "complement":
                model = ComplementNB()
            elif self.model_type == "gaussian":
                model = GaussianNB()

            model.fit(data, target)
            self.model_list.append(model)
 def save_batches(self, data, target):
     n_batches = np.round(len(data) / self.batch_size + 0.5).astype(int)
     # save batches
     for batch_index in range(n_batches):
         if self.show_progress:
             load.print_progress(batch_index / n_batches, "save_batches")
         if batch_index == n_batches - 1:
             batch_data = data[batch_index * self.batch_size:]
             batch_target = target[batch_index * self.batch_size:]
         else:
             batch_data = data[batch_index *
                               self.batch_size:(batch_index + 1) *
                               self.batch_size]
             batch_target = target[batch_index *
                                   self.batch_size:(batch_index + 1) *
                                   self.batch_size]
         np.save(
             self.dataset + "/batches/data_batch_no_" + str(batch_index) +
             ".npy", batch_data)
         np.save(
             self.dataset + "/batches/target_batch_no_" + str(batch_index) +
             ".npy", batch_target)
     return n_batches
    def make_model_approach_2(self, set="full"):

        if self.use_user_info:
            if self.split == "clients":
                user_info = self.client_split_dict[set]["info"]
            else:
                user_info = self.info_dict[set]
            n_user_features = len(user_info[0])
        else:
            n_user_features = 0

        if self.split == "clients":
            KPM = self.client_split_dict[set]["KPM"]
        else:
            KPM = self.KPM_dict[set]
        n_k, n_p = KPM.shape

        target = []
        data = []
        # target = [i for i in range(self.n_Produkte)]
        # data = [[0 for i in range(self.n_Produkte+n_user_features)] for i in range(self.n_Produkte)]

        for kunden_index in range(n_k):
            if self.show_progress:
                load.print_progress(kunden_index / n_k, "prepare data")

            for produkt_index in np.argwhere(
                    KPM[kunden_index] ==
                    1)[:, 0]:  # self.KPM_dict[set][kunden_index] == 1)[:,0]:
                target.append(produkt_index)
                var_Kunde = np.array(
                    KPM[kunden_index])  # self.KPM_dict[set][kunden_index])
                var_Kunde[produkt_index] = 0
                if self.use_user_info:
                    var_Kunde = np.hstack((var_Kunde, user_info[kunden_index]))

                data.append(var_Kunde)

        print("target", len(target))  # target.shape)
        print("data", len(data))  # data.shape)

        if self.model_type == "multinomial":
            model = MultinomialNB()
        elif self.model_type == "bernoulli":
            model = BernoulliNB()
        elif self.model_type == "complement":
            model = ComplementNB()
        elif self.model_type == "gaussian":
            model = GaussianNB()

        print("a")
        if self.batch_learning:
            n_batches = self.save_batches(data, target)
            print("############################", n_batches,
                  " ########################################")

            #			del data
            #			del target
            #			del var_Kunde
            #			del KPM
            #			gc.collect()
            print("now_train")
            model = self.train_batches(model, n_batches)
        else:
            model.fit(data, target)
        print("b")
        self.model = model