def _build(self, X, Y, I): """This method serves as the object building process. One can define several commands here that does not necessarily needs to be on its initialization. Args: X (np.array): Features array. Y (np.array): Labels array. """ # Iterate over every possible sample for i, (feature, label) in enumerate(zip(X, Y)): # Checks if indexes are supplied if I is not None: node = Node(I[i].item(), label.item(), feature) else: node = Node(i, label.item(), feature) # Appends the node to the list self.nodes.append(node) # Calculates the number of features self.n_features = self.nodes[0].features.shape[0]
def fit(self, X_train, Y_train, X_unlabeled, I_train=None): """Fits data in the semi-supervised classifier. Args: X_train (np.array): Array of training features. Y_train (np.array): Array of training labels. X_unlabeled (np.array): Array of unlabeled features. I_train (np.array): Array of training indexes. """ logger.info('Fitting semi-supervised classifier ...') start = time.time() # Creating a subgraph self.subgraph = Subgraph(X_train, Y_train, I_train) # Finding prototypes self._find_prototypes() # Gather current number of nodes current_n_nodes = self.subgraph.n_nodes for i, feature in enumerate(X_unlabeled): node = Node(current_n_nodes + i, 0, feature) self.subgraph.nodes.append(node) # Creating a minimum heap h = Heap(size=self.subgraph.n_nodes) for i in range(self.subgraph.n_nodes): if self.subgraph.nodes[i].status == c.PROTOTYPE: # If yes, it does not have predecessor nodes self.subgraph.nodes[i].pred = c.NIL # Its predicted label is the same as its true label self.subgraph.nodes[i].predicted_label = self.subgraph.nodes[ i].label # Its cost equals to zero h.cost[i] = 0 # Inserts the node into the heap h.insert(i) else: # Its cost equals to maximum possible value h.cost[i] = c.FLOAT_MAX while not h.is_empty(): # Removes a node p = h.remove() # Appends its index to the ordered list self.subgraph.idx_nodes.append(p) # Gathers its cost self.subgraph.nodes[p].cost = h.cost[p] for q in range(self.subgraph.n_nodes): if p != q: if h.cost[p] < h.cost[q]: if self.pre_computed_distance: weight = self.pre_distances[self.subgraph.nodes[ p].idx][self.subgraph.nodes[q].idx] else: weight = self.distance_fn( self.subgraph.nodes[p].features, self.subgraph.nodes[q].features) # The current cost will be the maximum cost between the node's and its weight (arc) current_cost = np.maximum(h.cost[p], weight) if current_cost < h.cost[q]: # `q` node has `p` as its predecessor self.subgraph.nodes[q].pred = p # And its predicted label is the same as `p` self.subgraph.nodes[ q].predicted_label = self.subgraph.nodes[ p].predicted_label # As we may have unlabeled nodes, make sure that `q` label equals to `q` predicted label self.subgraph.nodes[q].label = self.subgraph.nodes[ q].predicted_label # Updates the heap `q` node and the current cost h.update(q, current_cost) # The subgraph has been properly trained self.subgraph.trained = True end = time.time() train_time = end - start logger.info('Semi-supervised classifier has been fitted.') logger.info('Training time: %s seconds.', train_time)
def fit(self, X_train, Y_train, X_unlabeled, I_train=None): """Fits data in the semi-supervised classifier. Args: X_train (np.array): Array of training features. Y_train (np.array): Array of training labels. X_unlabeled (np.array): Array of unlabeled features. I_train (np.array): Array of training indexes. """ logger.info('Fitting semi-supervised classifier ...') # Initializing the timer start = time.time() # Creating a subgraph self.subgraph = Subgraph(X_train, Y_train, I_train) # Finding prototypes self._find_prototypes() # Gather current number of nodes current_n_nodes = self.subgraph.n_nodes # Iterate over every possible unlabeled sample for i, feature in enumerate(X_unlabeled): # Creates a Node structure node = Node(current_n_nodes + i, 1, feature) # Appends the node to the list self.subgraph.nodes.append(node) # Creating a minimum heap h = Heap(size=self.subgraph.n_nodes) # For each possible node for i in range(self.subgraph.n_nodes): # Checks if node is a prototype if self.subgraph.nodes[i].status == c.PROTOTYPE: # If yes, it does not have predecessor nodes self.subgraph.nodes[i].pred = c.NIL # Its predicted label is the same as its true label self.subgraph.nodes[i].predicted_label = self.subgraph.nodes[ i].label # Its cost equals to zero h.cost[i] = 0 # Inserts the node into the heap h.insert(i) # If node is not a prototype else: # Its cost equals to maximum possible value h.cost[i] = c.FLOAT_MAX # While the heap is not empty while not h.is_empty(): # Removes a node p = h.remove() # Appends its index to the ordered list self.subgraph.idx_nodes.append(p) # Gathers its cost self.subgraph.nodes[p].cost = h.cost[p] # For every possible node for q in range(self.subgraph.n_nodes): # If we are dealing with different nodes if p != q: # If `p` node cost is smaller than `q` node cost if h.cost[p] < h.cost[q]: # Checks if we are using a pre-computed distance if self.pre_computed_distance: # Gathers the distance from the distance's matrix weight = self.pre_distances[self.subgraph.nodes[ p].idx][self.subgraph.nodes[q].idx] # If the distance is supposed to be calculated else: # Calls the corresponding distance function weight = self.distance_fn( self.subgraph.nodes[p].features, self.subgraph.nodes[q].features) # The current cost will be the maximum cost between the node's and its weight (arc) current_cost = np.maximum(h.cost[p], weight) # If current cost is smaller than `q` node's cost if current_cost < h.cost[q]: # `q` node has `p` as its predecessor self.subgraph.nodes[q].pred = p # And its predicted label is the same as `p` self.subgraph.nodes[ q].predicted_label = self.subgraph.nodes[ p].predicted_label # As we may have unlabeled nodes, make sure that `q` label equals to `q` predicted label self.subgraph.nodes[q].label = self.subgraph.nodes[ q].predicted_label # Updates the heap `q` node and the current cost h.update(q, current_cost) # The subgraph has been properly trained self.subgraph.trained = True # Ending timer end = time.time() # Calculating training task time train_time = end - start logger.info('Semi-supervised classifier has been fitted.') logger.info('Training time: %s seconds.', train_time)
import numpy as np from opfython.core import Node # Defining an index idx = 0 # Defining a label label = 1 # Defining an array of features features = np.asarray([2, 2.5, 1.5, 4]) # Creating a Node n = Node(idx, label, features)