Exemplo n.º 1
0
    def model_selection_graphs(self, list_graphs):
        len_ = len(list_graphs)
        final_graphs_list = []

        Logistic = HighDimensionalLogisticRegression(fit_intercept=False)

        #Pick the first graph for the first round process
        graph = list_graphs[0]
        keys_set = graph.all_keys

        list_of_keys_sets = graph.get_list_keys_sets

        articles = set()

        for key_set_in_the_list in list_of_keys_sets:
            for article in graph.get_articles:

                X, y = self.get_article_covariates(article,
                                                   key_set_in_the_list)

                if len(y) > 5:
                    Logistic.fit(X, y)

                    if len(Logistic.model_) > 1:
                        articles.add(article)
        graphs = []
        if len(articles) > 0:
            for article in articles:
                article_ = set([article])
                X, y = self.get_article_covariates(article, keys_set)

                Logistic.fit(X, y)
                '''Combo!'''
                ones, zeros = Util.zeros_and_ones(Logistic.model_,
                                                  Logistic.coef_)
                second_key_set = self.get_sets(ones, zeros)  #keys set
                '''Make sure there's no need for classification anymore'''
                X, y = self.get_article_covariates(article,
                                                   keys_set - second_key_set)

                Logistic.fit(X, y)
                '''Combo!'''
                ones, zeros = Util.zeros_and_ones(Logistic.model_,
                                                  Logistic.coef_)
                third_key_set = self.get_sets(ones, zeros)  #keys set

                if len(third_key_set) != len(second_key_set) and len(
                        third_key_set) != 0:
                    list_key_sets = [
                        keys_set - third_key_set - second_key_set,
                        second_key_set - third_key_set, second_key_set
                    ]
                else:
                    list_key_sets = [keys_set - second_key_set, second_key_set]

                list_key_sets_2 = []

                for elem in list_key_sets:
                    if len(elem) > 0:
                        list_key_sets_2.append(elem)

                graph2 = Graph(article_, list_key_sets_2, self)  #collector
                graphs.append(graph2)

        list_ = []
        for key_set_in_the_list in list_of_keys_sets:
            ####For the first graph
            X, y = self.get_article_covariates(graph.get_articles - articles,
                                               key_set_in_the_list)

            Logistic.fit(X, y)
            '''Combo!'''
            ones, zeros = Util.zeros_and_ones(Logistic.model_, Logistic.coef_)
            second_key_set = self.get_sets(ones, zeros)  #keys set
            '''Make sure there's no need for classification anymore'''
            X, y = self.get_article_covariates(
                graph.get_articles - articles,
                key_set_in_the_list - second_key_set)
            '''Combo!'''
            ones, zeros = Util.zeros_and_ones(Logistic.model_, Logistic.coef_)
            third_key_set = self.get_sets(ones, zeros)  #keys set

            if len(third_key_set) != len(second_key_set) and len(
                    third_key_set) != 0:
                list_key_sets = [
                    keys_set - third_key_set - second_key_set,
                    second_key_set - third_key_set, second_key_set
                ]
            else:
                list_key_sets = [keys_set - second_key_set, second_key_set]

            for elem in list_key_sets:
                if len(elem) > 0:
                    list_.append(elem)

        graph1 = Graph(graph.get_articles - articles, list_, self)  #collector

        final_graphs_list.append(graph1)
        final_graphs_list.extend(graphs)

        ####
        for graph in list_graphs[1:]:
            list_of_keys_sets = graph.get_list_keys_sets
            list_ = []
            for key_set_in_the_list in list_of_keys_sets:
                article = graph.get_articles
                keys_set = graph.all_keys

                X, y = self.get_article_covariates(article,
                                                   key_set_in_the_list)

                Logistic.fit(X, y)
                '''Combo!'''
                ones, zeros = Util.zeros_and_ones(Logistic.model_,
                                                  Logistic.coef_)
                second_key_set = self.get_sets(ones, zeros)  #keys set
                '''Make sure there's no need for classification anymore'''
                X, y = self.get_article_covariates(
                    article, key_set_in_the_list - second_key_set)

                Logistic.fit(X, y)
                '''Combo!'''
                ones, zeros = Util.zeros_and_ones(Logistic.model_,
                                                  Logistic.coef_)
                third_key_set = self.get_sets(ones, zeros)  #keys set

                if len(third_key_set) != len(second_key_set) and len(
                        third_key_set) != 0:
                    list_key_sets = [
                        keys_set - third_key_set - second_key_set,
                        second_key_set - third_key_set, second_key_set
                    ]
                else:
                    list_key_sets = [keys_set - second_key_set, second_key_set]

                for elem in list_key_sets:
                    if len(elem) > 0:
                        list_.append(elem)

                #position if list of graphs matters!

            graph2 = Graph(article, list_, self)  #collector
            final_graphs_list.append(graph2)

        return final_graphs_list
Exemplo n.º 2
0
    def model_selection_graphs(self, list_graphs):
								
								final_graphs_list = []
								
								Logistic = HighDimensionalLogisticRegression(fit_intercept=False)
																
								keys_set = self.all_keys
								
								articles = set()
								articles_emergency = set()
								article_indexes = []
								article_compe = []
								
								article_not_all_zeros = set()
								
								model_bool = np.array([]).astype(int)
								for article in self.all_articles:
												
												X, y = self.get_article_covariates(article, keys_set)
												
												if len(y) > 5:
																Logistic.fit(X, y)
																article_indexes.append(article)
																article_compe.append(Logistic.coef_[0,0])
																if len(Logistic.model_) > 1:
																				articles.add(article)
																				model_bool = np.append(model_bool, Logistic.model_)
																																																			
								
												if sum(y) != 0:
																article_not_all_zeros.add(article)
								
								if model_bool.shape[0] > 0:
												
												model_bool.flatten()		
												model_bool = np.unique(model_bool)
												#print(model_bool, '870')	
								else:
												model_bool = None
																				
								n_ = len(article_compe)
								if len(articles) > 0:
												
												places = np.sort(article_compe)[(n_-1):n_]
												article_indexes = np.array(article_indexes)
												
												result = set([int(article) for article in article_indexes[np.in1d(article_compe, places)]])
												result = result - articles
												articles_emergency.update(result)
												
								else:																								
												return [Graph(self.all_articles, [keys_set], self)]
								graphs = []
								if len(articles) > 0:
												for article in articles:
																article_ = set([article])
																X, y = self.get_article_covariates(article_, keys_set)
																
																Logistic.fit(X, y)
																
																
																
																'''Combo!'''
																ones, zeros = Util.zeros_and_ones(Logistic.model_, Logistic.coef_)
																second_key_set = self.get_sets(ones, zeros, keys_set) #keys set
																
																
																
																
																if len(keys_set) != len(second_key_set) and len(second_key_set) != 0:
																				list_key_sets = [keys_set - second_key_set, second_key_set]
																else: 
																				list_key_sets = [keys_set]
																
																
																list_key_sets_2 = []
																
																for elem in list_key_sets:
																				if len(elem) > 0:
																								list_key_sets_2.append(elem)
																
																graph2 = Graph(article_, list_key_sets_2, self, model_bool)#collector
																graphs.append(graph2)
																
								if len(articles_emergency) > 0:												
												graph3 = Graph(articles_emergency, [keys_set], self, model_bool)#collector
												graphs.append(graph3)
												
								X, y = self.get_article_covariates(self.all_articles - articles - articles_emergency, keys_set)
								
								Logistic.fit(X, y)
								'''Combo!'''
								ones, zeros = Util.zeros_and_ones(Logistic.model_, Logistic.coef_)
								second_key_set = self.get_sets(ones, zeros, keys_set) #keys set
								
								
								if len(keys_set) != len(second_key_set) and len(second_key_set) != 0:
												list_key_sets = [keys_set - second_key_set, second_key_set]
								else: 
												list_key_sets = [keys_set]
								
								
								list_key_sets_2 = []
								
								for elem in list_key_sets:
												if len(elem) > 0:
																list_key_sets_2.append(elem)
								
								graph2 = Graph(self.all_articles - articles - articles_emergency, list_key_sets_2, self, model_bool)#collector
								final_graphs_list.append(graph2)
								final_graphs_list.extend(graphs)
								
								return final_graphs_list
Exemplo n.º 3
0
    def model_selection_graphs(self, list_graphs):

        final_graphs_list = []

        Logistic = HighDimensionalLogisticRegression(fit_intercept=False)

        keys_set = self.all_keys

        X, y = self.get_article_covariates(self.all_articles, keys_set)

        Logistic.fit(X, y)
        '''Combo!'''
        ones, zeros = Util.zeros_and_ones(Logistic.model_, Logistic.coef_)
        second_key_set = self.get_sets(ones, zeros, keys_set)  #keys set

        articles = set()
        if len(keys_set) != len(second_key_set) and len(second_key_set) != 0:

            for article in self.all_articles:

                X, y = self.get_article_covariates(article, second_key_set)

                if len(y) > 5:
                    Logistic.fit(X, y)

                    if len(Logistic.model_) > 1:
                        articles.add(article)

        graphs = []
        if len(articles) > 0:
            for article in articles:
                article_ = set([article])
                X, y = self.get_article_covariates(article_, keys_set)

                Logistic.fit(X, y)
                '''Combo!'''
                ones, zeros = Util.zeros_and_ones(Logistic.model_,
                                                  Logistic.coef_)
                second_key_set = self.get_sets(ones, zeros,
                                               keys_set)  #keys set

                if len(keys_set) != len(second_key_set) and len(
                        second_key_set) != 0:
                    list_key_sets = [keys_set - second_key_set, second_key_set]
                else:
                    list_key_sets = [keys_set]

                list_key_sets_2 = []

                for elem in list_key_sets:
                    if len(elem) > 0:
                        list_key_sets_2.append(elem)

                graph2 = Graph(article_, list_key_sets_2, self)  #collector
                graphs.append(graph2)

        X, y = self.get_article_covariates(self.all_articles - articles,
                                           keys_set)

        Logistic.fit(X, y)
        '''Combo!'''
        ones, zeros = Util.zeros_and_ones(Logistic.model_, Logistic.coef_)
        second_key_set = self.get_sets(ones, zeros, keys_set)  #keys set

        if len(keys_set) != len(second_key_set) and len(second_key_set) != 0:
            list_key_sets = [keys_set - second_key_set, second_key_set]
        else:
            list_key_sets = [keys_set]

        list_key_sets_2 = []

        for elem in list_key_sets:
            if len(elem) > 0:
                list_key_sets_2.append(elem)

        graph2 = Graph(self.all_articles - articles, list_key_sets_2,
                       self)  #collector
        final_graphs_list.append(graph2)
        final_graphs_list.extend(graphs)

        return final_graphs_list
Exemplo n.º 4
0
    def model_selection_graphs(self, list_graphs):

        final_graphs_list = []

        Logistic = HighDimensionalLogisticRegression(fit_intercept=False)

        keys_set = self.all_keys

        articles = set()
        articles_emergency = set()
        article_indexes = []
        article_compe = []

        article_not_all_zeros = set()

        global MODEL
        global FEW
        #MODEL = []
        model_bool = np.array([]).astype(int)
        FEW = []
        #FEW2 = []
        for article in self.all_articles:
            X, y = self.get_article_covariates(article, keys_set)

            #if len(y) > 5:
            article_indexes.append(article)

            if len(y) == 0:
                article_compe.append(0)
            else:
                article_compe.append(np.mean(y))

            if len(y) <= 15:
                FEW.append(article)

        n_ = len(article_compe)

        std = np.std(article_compe)
        ave = np.mean(article_compe)
        skewness = stats.skew(article_compe)

        global para1
        global para2
        global para3

        ordered_compe = np.sort(article_compe)[::-1]

        l = 0
        if len(article_compe) > 0 and sum(article_compe) != 0.0:
            while len(ordered_compe) > l and ordered_compe[l] >= (
                    ave + para1 * std + para2 * l * std + para3 *
                (l + 1) * abs(skewness)):
                l = l + 1
        else:
            return [Graph(self.all_articles, [keys_set], self, model_bool)]

        print(articles, skewness, std, ave, l, len(article_compe), '902')

        places = np.sort(article_compe)[(n_ - l):n_]
        article_indexes = np.array(article_indexes)

        result = set([
            int(article)
            for article in article_indexes[np.in1d(article_compe, places)]
        ])
        #result.update(articles)

        X, y = self.get_article_covariates(self.all_articles, keys_set)

        Logistic.fit(X, y)

        if len(Logistic.model_) > 1:
            model_bool = np.append(
                model_bool,
                Logistic.model_[Logistic.coef_[Logistic.coef_ != 0] > 0])
            model_bool = np.setdiff1d(model_bool, 0)

        if model_bool.shape[0] > 0:
            model_bool.flatten()
            model_bool = np.unique(model_bool)
        else:
            model_bool = None

        if not model_bool is None:
            MODEL.extend(model_bool)
            MODEL = list(np.unique(MODEL))

        print(model_bool, MODEL, '893')

        if l > 0 and l < len(self.all_articles) and len(MODEL) > 0:
            graph2 = Graph(result, [keys_set], self, model_bool)  #collector
            graph3 = Graph(self.all_articles - result, [keys_set], self,
                           model_bool)  #collector

            final_graphs_list.append(graph3)
            final_graphs_list.append(graph2)

            return final_graphs_list

        else:
            return [Graph(self.all_articles, [keys_set], self, model_bool)]
Exemplo n.º 5
0
    def model_selection_graphs(self):
        '''return list of graph(s)'''

        keys_set = self.__all_active_keys

        Logistic = HighDimensionalLogisticRegression(fit_intercept=False)

        articles = set()
        for article in self.all_articles:
            X, y = self.get_article_covariates(article, keys_set)

            if len(y) > 5:
                Logistic.fit(X, y)

                if len(Logistic.model_) > 1:
                    articles.add(article)

        if len(articles) > 0:
            print('article more than 0', '622')
            #For the second graph

            X, y = self.get_article_covariates(articles, keys_set)

            Logistic.fit(X, y)
            '''Combo!'''
            ones, zeros = Util.zeros_and_ones(Logistic.model_, Logistic.coef_)
            second_graph_second_key_set = self.get_sets(ones, zeros)  #keys set
            '''Make sure there's no need for classification anymore'''
            X, y = self.get_article_covariates(
                articles, keys_set - second_graph_second_key_set)

            Logistic.fit(X, y)
            '''Combo!'''
            ones, zeros = Util.zeros_and_ones(Logistic.model_, Logistic.coef_)
            second_graph_extra_key_set = self.get_sets(ones, zeros)  #keys set

            list_key_sets = [
                keys_set - second_graph_second_key_set -
                second_graph_extra_key_set,
                second_graph_extra_key_set | second_graph_second_key_set
            ]
            list_key_sets_2 = []

            for elem in list_key_sets:
                if len(elem) > 0:
                    list_key_sets_2.append(elem)

            graph2 = Graph(articles, list_key_sets_2, self)  #collector

            ####For the first graph
            X, y = self.get_article_covariates(self.all_articles - articles,
                                               keys_set)

            Logistic.fit(X, y)
            '''Combo!'''
            ones, zeros = Util.zeros_and_ones(Logistic.model_, Logistic.coef_)
            first_graph_second_key_set = self.get_sets(ones, zeros)  #keys set
            '''Make sure there's no need for classification anymore'''
            X, y = self.get_article_covariates(
                self.all_articles - articles,
                keys_set - first_graph_second_key_set)
            '''Combo!'''
            ones, zeros = Util.zeros_and_ones(Logistic.model_, Logistic.coef_)
            first_graph_extra_key_set = self.get_sets(ones, zeros)  #keys set

            list_key_sets = [
                keys_set - first_graph_second_key_set -
                first_graph_extra_key_set,
                first_graph_extra_key_set | first_graph_second_key_set
            ]
            list_key_sets_1 = []

            for elem in list_key_sets:
                if len(elem) > 0:
                    list_key_sets_1.append(elem)

            graph1 = Graph(self.all_articles - articles, list_key_sets_1,
                           self)  #collector

            return [graph1, graph2]

        else:

            X, y = self.get_covariates(keys_set)

            Logistic.fit(X, y)
            '''Combo!'''
            ones, zeros = Util.zeros_and_ones(Logistic.model_, Logistic.coef_)
            extra_key_set = self.get_sets(ones, zeros)  #keys set

            if len(extra_key_set) == len(keys_set) or len(extra_key_set) == 0:
                return [Graph(self.all_articles, [keys_set], self)]

            list_key_sets = [keys_set - extra_key_set, extra_key_set]
            list_keys = []

            for elem in list_key_sets:
                if len(elem) > 0:
                    list_keys.append(elem)

            graph = Graph(self.all_articles, list_keys, self)  #collector

            return [graph]