Exemplo n.º 1
0
    def train(self, X, y=None):
        # Convert input values to RavOp tensors
        X = Tensor(X, name="X")
        y = Tensor(y, name="y")

        # 2. Train
        # 3. Accuracy

        row_count = X.shape[0]
        column_count = X.shape[1]
        val = np.mean(np.array(np.arange(row_count)))
        eval = float("inf")
        min_leaf = Scalar(5)

        for c in range(column_count):
            x = X.output[row_count, c]

            for r in range(row_count):
                x1 = Tensor(x)
                r1 = Scalar(x[r])

                lhs = x1.less_equal(r1)
                rhs = x1.greater(r1)

                a = lhs.matsum().less(min_leaf)
                b = rhs.matsum().less(min_leaf)
                if a.logical_or(b):
                    continue

        size = X.shape[1]
        no_samples = Scalar(X.shape[0])
        weights = Tensor(np.random.uniform(0, 1, size).reshape((size, 1)),
                         name="weights")

        y_pred = X.matmul(weights)
 def __compute_cost(self, y, y_pred, no_samples, name="cost"):
     """Cost function"""
     a = y_pred.sub(y)
     b = a.multiply(a).sum()
     cost = Scalar(1).div(Scalar(2).multiply(no_samples)).multiply(
         b, name=name)
     return cost
Exemplo n.º 3
0
    def train(self, X, y, iter=10):
        self.clean()

        # Convert input values to RavOp tensors
        X = Tensor(X, name="X")
        y = Tensor(y, name="y")

        # Initialize params
        learning_rate = Scalar(self.learning_rate)
        size = X.shape[1]
        no_samples = Scalar(X.shape[0])
        weights = Tensor(np.random.uniform(0, 1, size).reshape((size, 1)),
                         name="weights")

        # 1. Predict
        y_pred = X.matmul(weights, name="y_pred")

        # 2. Compute cost
        cost = self.__compute_cost(y, y_pred, no_samples)

        # 3. Gradient descent - Update weight values
        for i in range(iter):
            y_pred = X.matmul(weights, name="y_pred{}".format(i))
            c = X.trans().matmul(y_pred)
            d = learning_rate.div(no_samples)
            weights = weights.sub(c.elemul(d), name="weights{}".format(i))
            cost = self.__compute_cost(y,
                                       y_pred,
                                       no_samples,
                                       name="cost{}".format(i))

        return cost, weights
Exemplo n.º 4
0
    def remove_less_significant_features(self, X, Y):
        """
        Removing Less significant features

        Parameters:
                    X=input features
                    Y=output
        Output:
                Less important features removed
        """

        X = Tensor(X, name="X")
        Y = Tensor(Y, name="Y")

        sl = 0.05
        regression_ols = None
        columns_dropped = Tensor([])
        for i in range(0, len(X.output.columns)):

            regression_ols = sm.OLS(Y.output, X.output).fit()
            max_col = regression_ols.pvalues.idxmax()
            max_val = regression_ols.pvalues.max()

            if Scalar(max_val).greater(Scalar(sl)):
                X.output.drop(max_col, axis='columns', inplace=True)
                columns_dropped.output = np.append(columns_dropped.output,
                                                   [max_col])
            else:
                break
        regression_ols.summary()

        return columns_dropped
Exemplo n.º 5
0
    def train(self, X, y, iter=10):
        # Remove old ops and start from scratch
        self.clean()

        # Convert input values to RavOp tensors
        X = Tensor(X, name="X")
        y = Tensor(y, name="y")

        # Initialize params
        learning_rate = Scalar(self._learning_rate)
        size = X.shape[1]
        no_samples = Scalar(X.shape[0])
        weights = Tensor(np.random.uniform(0, 1, size).reshape((size, 1)), name="weights")

        # 1. Predict - Calculate y_pred
        y_pred = self.sigmoid(X.matmul(weights), name="y_pred")

        # 2. Compute cost
        cost = self.__compute_cost(y, y_pred, no_samples)

        for i in range(iter):
            y_pred = self.sigmoid(X.matmul(weights), name="y_pred{}".format(i))
            weights = weights.sub(learning_rate.div(no_samples).elemul(X.trans().matmul(y_pred.sub(y))),
                                  name="weights{}".format(i))
            cost = self.__compute_cost(y=y, y_pred=y_pred, no_samples=no_samples, name="cost{}".format(i))

        return cost, weights
Exemplo n.º 6
0
    def train(self, X, y):
        # Convert input values to RavOp tensors
        self.X = Tensor(X, name="X")
        self.y = Tensor(y, name="y")

        # Initialize params
        self.no_features = Scalar(X.shape[1], name="no_features")
        self.no_samples = Scalar(X.shape[0], name="no_samples")
        self.W = Tensor(np.zeros((self.no_features.output, 1)), name="W")
        self.b = Scalar(0, name="b")
        # self.weights = Tensor(np.random.uniform(0, 1, self.no_features).reshape((self.no_features, 1)), name="weights")

        # gradient descent learning

        for i in range(self.iterations):
            self.update_weights()

        return self

        # 1. Predict
        y_pred = X.matmul(weights, name="y_pred")

        # 2. Compute cost
        cost = self.__compute_cost(y, y_pred, no_samples)

        # 3. Gradient descent - Update weight values
        for i in range(iter):
            y_pred = X.matmul(weights, name="y_pred{}".format(i))
            c = X.transpose().matmul(y_pred)
            d = self.learning_rate.div(no_samples)
            weights = weights.sub(c.multiply(d), name="weights{}".format(i))
            cost = self.__compute_cost(y, y_pred, no_samples, name="cost{}".format(i))

        return cost, weights
Exemplo n.º 7
0
    def __init__(self, learning_rate = 0.001, lambda_param = 0.01, n_iters = 5):

        self.lr = Scalar(learning_rate)
        self.lambda_param = Scalar(lambda_param)
        self.n_iters = n_iters
        self.w = None
        self.b = None
Exemplo n.º 8
0
    def update_weights(self):
        y_pred = self.predict(self.X)

        dW = Scalar(-1).multiply(Scalar(2).multiply(self.X.transpose().dot(self.y.sub(y_pred))).div(self.no_samples))
        db = Scalar(-2).multiply(R.sum(self.y.sub(y_pred))).div(self.no_samples)

        self.W = self.W.sub(self.learning_rate.multiply(dW), name="W")
        self.b = self.b.sub(self.learning_rate.multiply(db), name="b")

        return self
Exemplo n.º 9
0
    def euclidean_distance(self, a, b):
        """ 
        
        Returns a scalar Euclidean Distance value between two points on a 2-D plane 
        
        Parameters:

                    a = Point_1 on the plane
                    b = Point_2 on the plane

        Output:

                Scalar Value for Distance between the two points.
        
        """
        a = Tensor(a, name="a")
        sq_cal = square_root(((a.sub(b)).pow(Scalar(2))).sum(axis=1))
        while sq_cal.status != "computed":
            pass
        # np.sqrt(sum((a-b)**2), axis = 1)
        # c = Scalar(2)
        # d = Scalar(4)
        # e = c.add(d)
        # print("\n\nOutput is : \n\n",e.output, "\n\n Status is : \n\n", e.status)
        # a = Tensor([[1]])
        # b = [[2.724]]
        # print("\n what is a \n", a)
        # distance = Tensor(b, name = "d_check")
        # inverse_distance = a.div(distance)
        # while inverse_distance.status != "computed":
        #     pass
        # print("\n inverse_distance_first created \n", inverse_distance)

        return sq_cal
Exemplo n.º 10
0
    def computing_cost(self, W, X, Y):
        """

            It will calculate the optimal parameters for W and b parameters in order to minimise the cost function.

            Parameters:
                        W = Weights
                        X = Input Features
                        Y = Target Output


            Output:
                It returns the cost
        
        """
        W = Tensor(W, name="W")
        X = Tensor(X, name="X")
        Y = Tensor(Y, name="Y")

        N = X.shape[0]
        distances = Scalar(1).sub((Y.matmul(X.dot(W))))
        # distances = 1 - Y*(np.dot(X, W))
        # max(0, distance)
        distances[distances.less(Scalar(0))] = Scalar(0)
        loss = Scalar(self.regularisation_parameter).mul(sum(distances) / N)
        # find cost
        cost = Scalar(0.5).mul((W.dot(W))).add(loss)

        return cost
Exemplo n.º 11
0
    def score(self, X_test, y_test):
        """ 
        Used to measure performance of our algorithm

        Parameters:
                    X_test = Test data
                    y_test = Target Test Data

        Output:
                Returns the Score Value
        """
        # X_test = Tensor(X_test, name = "X_test")
        # y_test = y_test.reshape(len(y_test), 1)
        y_test = Tensor(y_test, name="y_test")
        print("\n Shape of y_test \n", y_test.shape)
        y_pred = Tensor(self.predict(X_test), name="y_pred")
        print("\n\n Prediction is ...\n\n", y_pred.output)

        return float(Scalar(sum(y_pred.equal(y_test)))) / float(
            Scalar(len(y_test)))
Exemplo n.º 12
0
    def calculate_cost_gradient(self, W, X_batch, Y_batch):
        """
        
        Calculating Cost for Gradient

        Parameters:
                    X_batch = Input features in batch or likewise depending on the type of gradient descent method used
                    Y_batch = Target features in batch or likewise depending on the type of gradient descent method used

        Output:
                Weights Derivatives

        """
        W = Tensor(W, name="W")
        X_batch = Tensor(X_batch, name="X_batch")
        Y_batch = Tensor(Y_batch, name="Y_batch")

        # if type(Y_batch) == np.float64:
        #     Y_batch = np.array([Y_batch])
        #     X_batch = np.array([X_batch])

        distance = Scalar(1).sub((Y_batch.matmul(X_batch.dot(W))))
        dw = np.zeros(len(W))
        dw = Tensor(dw, name="dw")

        for ind, d in enumerate(distance.output):

            if Scalar(max(0, d)).equal(Scalar(0)):
                di = W

            else:
                di = W.sub(
                    Scalar(self.regularisation_parameter).mul(
                        Y_batch.output[ind].mul(X_batch.output[ind])))

            dw += di

        dw = dw.div(len(Y_batch))  # average

        return dw
Exemplo n.º 13
0
    def Stochastic_gradient_descent(self, features, outputs):
        """
        
        SGD to calculate Gradients such that only a Single points are considered to update weights

        Parameters:
                    features = Input Features
                    outputs = outputs

        Output:
                weights


        """

        features = Tensor(features, name="features")
        outputs = Tensor(outputs, name="outputs")

        max_epochs = 5000
        weights = np.zeros(features.shape[1])
        print(
            "\n\n----------------- STOCHASTIC GRADIENT DESCENT RUNNING -----------------\n\n"
        )

        nth = 0
        prev_cost = float("inf")
        print("\n Previous Cost:", prev_cost)
        cost_threshold = 0.01  # in percent
        # stochastic gradient descent
        for epoch in range(1, max_epochs):
            # shuffle to prevent repeating update cycles
            X, Y = shuffle(features, outputs)
            for ind, x in enumerate(X):
                ascent = self.calculate_cost_gradient(weights, x,
                                                      Y.output[ind])
                weights = weights.sub((Scalar(self.learning_rate).mul(ascent)))

            # convergence check on 2^nth epoch
            if epoch.equal(Scalar(2).exp(Scalar(nth))) or epoch.equal(
                    Scalar(max_epochs).sub(Scalar(1))):
                cost = self.computing_cost(weights, features, outputs)
                print("Epoch is: {} and Cost is: {}".format(epoch, cost))
                # stoppage criterion
                if abs(Scalar(prev_cost).sub(cost)).less(
                    (Scalar(cost_threshold).mul(Scalar(prev_cost)))):
                    return weights
                prev_cost = cost
                nth += 1
        return weights
Exemplo n.º 14
0
    def fit(self, X, y):

        n_samples, n_features = X.shape
        y_ = y
        # y_ = R.where(y <= 0, -1, 1)
        self.w = R.Tensor(np.zeros(n_features))
        self.b = Scalar(0)

        for epoch in range(self.n_iters):
            print("Epoch: ",  epoch)
            for idx, x_i in enumerate(X):
                x_i = Tensor(x_i)
                y_i = Tensor([y_[idx]])
                val = y_i *  (R.dot(x_i, self.w) - self.b)
                condition = R.greater_equal(val,  Scalar(1))
                while condition.status != 'computed':
                    pass
                if condition():
                    self.w = self.w - self.lr * (Scalar(2) * self.lambda_param * self.w)
                else:
                    self.w = self.w - self.lr * (Scalar(2) * self.lambda_param * self.w - R.mul(x_i, y_i))
                    self.b = self.b - (self.lr * y_i)
Exemplo n.º 15
0
    def train(self, X, y=None):
        # Convert input values to RavOp tensors
        X = Tensor(X, name="X")
        y = Tensor(y, name="y")

        # 2. Train
        # 3. Accuracy

        size = X.shape[1]
        no_samples = Scalar(X.shape[0])
        weights = Tensor(np.random.uniform(0, 1, size).reshape((size, 1)),
                         name="weights")

        y_pred = X.matmul(weights)
Exemplo n.º 16
0
    def __compute_cost(self, y, y_pred, no_samples, name="cost"):
        """Cost function"""
        epsilon = Scalar(1e-5)
        one = Scalar(1)

        c1 = y.neg().trans().matmul(y_pred.add(epsilon).natlog())
        c2 = one.sub(y).trans().matmul(one.sub(y_pred).add(epsilon).natlog())
        cost = one.div(no_samples).elemul(c1.sub(c2), name=name)
        return cost
Exemplo n.º 17
0
 def eucledian_distance(self, X):
     X = R.expand_dims(X, axis=1)
     return R.square_root(R.sub(X, self.X_train).pow(Scalar(2)).sum(axis=2))
Exemplo n.º 18
0
    def predict(self, X_test):
        """ 
        predict the data

        Parameters:
                    X_test = Data on which prediction has to be made

        Output:
                Gives you the Prediction

        """

        if self.weights == "uniform":
            neighbours = self.KNN_neighbours(X_test)
            print("\n neighbours \n", neighbours, "\n data type \n",
                  type(neighbours))
            # neighbours is a Tensor, use neighbours.output for converting to nd array
            # to understand bincount(), visit - https://i.stack.imgur.com/yAwym.png
            # print("\n\n what is neighbours here ...? \n\n", neighbours, "\n\n  What is the Data Type of Neighbours?\n\n", type(neighbours))
            # y_train_array = self.y_train

            y_pred = ([
                np.argmax(np.bincount(self.y_train[neighbour]))
                for neighbour in neighbours
            ])
            # y_pred = Tensor(y_pred, name = "y_pred_from uniform weights")

            print("\n y_pred \n", y_pred, "\n data type \n", type(y_pred))

            return y_pred

        if self.weights == "distance":

            # N nearest neighbours distance and indexes
            distance, neighbour_index = self.KNN_neighbours(
                X_test, return_distance=True)
            # X_test is array here not Tensor but returned variables are Tensors
            # print("\n distance \n", distance, "\n neighbour_index \n", neighbour_index, "\ndistance type\n", type(distance)
            #       , "\n neighbour_index data type \n", type(neighbour_index))
            # distance_demo = Tensor(distance, name = "distance_in_inverse")
            # from here it does not work..
            a = Scalar(1)
            # d = Scalar(4)
            # e = d.add(a)
            # while e.status != "computed":
            #     pass
            # print("\n\nOutput is : \n\n",e.output, "\n\n Status is : \n\n", e.status)
            # a = Tensor([[1]])
            # print("\n what is a \n", a)
            print("\n Data type of distance before converting to Tensor \n",
                  type(distance))
            distance = Tensor(distance, name="distance tensor")
            print("\n distance being converetd to Tensor: \n", distance)
            print("\n\n Shape of New Tensor Created \n\n", distance.shape)
            inverse_distance = a.div(distance)
            while inverse_distance.status != "computed":
                pass
            print("\n inverse_distance_first created \n", inverse_distance)

            mean_inverse_distance = inverse_distance.div(
                inverse_distance.sum(axis=1).output[:, np.newaxis])
            while mean_inverse_distance.status != "computed":
                pass

            print("\n mean_inverse_distance", mean_inverse_distance,
                  "data type of mean_inverse_distance",
                  type(mean_inverse_distance))

            mean_inverse_distance = Tensor(mean_inverse_distance,
                                           name="mean_inverse_distance")

            proba = []

            # running loop on K nearest neighbours elements only and selecting train for them
            for i, row in enumerate(mean_inverse_distance.output):

                row_pred = self.y_train[neighbour_index.output[i]]
                print("\n row_pred \n", row_pred, " \n data type \n",
                      type(row_pred))

                for k in range(self.n_classes):
                    indices = np.where(
                        (Tensor(row_pred, name="row_pred").equal(k)).output)
                    while indices.status != "computed":
                        pass
                    print("\n indices \n", indices, " \n data type \n",
                          type(indices))
                    prob_ind = sum(row[indices])
                    print("\n prob_ind", prob_ind, "\n data type \n",
                          type(prob_ind))
                    proba.append(Tensor(prob_ind, name="prob_ind").output)
                    print(proba, "proba")

            predict_proba = Tensor(proba, name="proba").reshape(
                Scalar(X_test.shape[0]), self.n_classes)
            print("\n predict_proba \n", predict_proba, "\n data type \n",
                  type(predict_proba))
            y_pred = Tensor(
                [argmax(Scalar(item)) for item in predict_proba.output],
                name="y_pred")
            print("\n y_pred \n", y_pred, "\n data type \n", type(y_pred))

            return y_pred
Exemplo n.º 19
0
 def __euclidean_distance(self, X):
     X = R.expand_dims(X, axis=1, name="expand_dims")
     return R.square_root(R.sub(X, self._X).pow(Scalar(2)).sum(axis=2))
Exemplo n.º 20
0
 def closest_centroids(self, centroids):
     centroids = R.expand_dims(centroids, axis=1)
     return R.argmin(
         square_root(
             R.sub(self.points, centroids).pow(Scalar(2)).sum(axis=2)))
Exemplo n.º 21
0
 def sigmoid(self, x, name="sigmoid"):
     """Sigmoid activation function"""
     # 1/(1+e^-x)
     one = Scalar(1)
     return one.div(x.neg().exp().add(one), name=name)
Exemplo n.º 22
0
def eucledian_distance(self, X, Y):
    return R.square_root(((R.sub(X, Y)).pow(Scalar(2))).sum(axis=0))
Exemplo n.º 23
0
class LinearRegression(Graph):
    def __init__(self, id=None, **kwargs):
        super().__init__(id=id, **kwargs)

        self.__setup_logger()

        # Define hyper-parameters
        self.learning_rate = R.Scalar(kwargs.get("learning_rate", 0.01), name="learning_rate")
        self.iterations = kwargs.get("iterations", 100)

        self.X = None
        self.y = None
        self.W = None
        self.b = None
        self.no_samples = None
        self.no_features = None

    def __setup_logger(self):
        # Set up a specific logger with our desired output level
        self.logger = logging.getLogger(LinearRegression.__class__.__name__)
        self.logger.setLevel(logging.DEBUG)

        # Add the log message handler to the logger
        handler = logging.handlers.RotatingFileHandler(RAVML_LOG_FILE)

        self.logger.addHandler(handler)

    def train(self, X, y):
        # Convert input values to RavOp tensors
        self.X = Tensor(X, name="X")
        self.y = Tensor(y, name="y")

        # Initialize params
        self.no_features = Scalar(X.shape[1], name="no_features")
        self.no_samples = Scalar(X.shape[0], name="no_samples")
        self.W = Tensor(np.zeros((self.no_features.output, 1)), name="W")
        self.b = Scalar(0, name="b")
        # self.weights = Tensor(np.random.uniform(0, 1, self.no_features).reshape((self.no_features, 1)), name="weights")

        # gradient descent learning

        for i in range(self.iterations):
            self.update_weights()

        return self

        # 1. Predict
        y_pred = X.matmul(weights, name="y_pred")

        # 2. Compute cost
        cost = self.__compute_cost(y, y_pred, no_samples)

        # 3. Gradient descent - Update weight values
        for i in range(iter):
            y_pred = X.matmul(weights, name="y_pred{}".format(i))
            c = X.transpose().matmul(y_pred)
            d = self.learning_rate.div(no_samples)
            weights = weights.sub(c.multiply(d), name="weights{}".format(i))
            cost = self.__compute_cost(y, y_pred, no_samples, name="cost{}".format(i))

        return cost, weights

    def predict(self, X, weights=None):
        """Predict values"""
        return R.matmul(X, self.weights).add(self.bias)

    def update_weights(self):
        y_pred = self.predict(self.X)

        dW = Scalar(-1).multiply(Scalar(2).multiply(self.X.transpose().dot(self.y.sub(y_pred))).div(self.no_samples))
        db = Scalar(-2).multiply(R.sum(self.y.sub(y_pred))).div(self.no_samples)

        self.W = self.W.sub(self.learning_rate.multiply(dW), name="W")
        self.b = self.b.sub(self.learning_rate.multiply(db), name="b")

        return self

    def __compute_cost(self, y, y_pred, no_samples, name="cost"):
        """Cost function"""
        return R.multiply(R.Scalar(1.0 / (2.0 * no_samples.output)), R.sum(R.square(R.sub(y_pred, y))), name=name)
        # a = y_pred.sub(y)
        # b = R.square(a).sum()
        # R.one()
        # cost = R.one().div(Scalar(2).multiply(no_samples)).multiply(b, name=name)
        # return cost

    @property
    def weights(self):
        """Retrieve weights"""
        if self.W is not None:
            return self.W

        ops = self.get_ops_by_name(op_name="W", graph_id=self.id)
        if len(ops) == 0:
            raise Exception("You need to train your model first")

        # Get weights
        weight_op = ops[-1]
        if weight_op.status == "pending" or weight_op.status == "computing":
            raise Exception("Please wait. Your model is getting trained")

        return weight_op

    @property
    def bias(self):
        """Retrieve bias"""
        if self.b is not None:
            return self.b

        ops = self.get_ops_by_name(op_name="b", graph_id=self.id)
        if len(ops) == 0:
            raise Exception("You need to train your model first")

        # Get weights
        b_op = ops[-1]
        if b_op.status == "pending" or b_op.status == "computing":
            raise Exception("Please wait. Your model is getting trained")

        return b_op

    def score(self, X, y, name="r2"):
        g.graph_id = None
        if not isinstance(X, R.Tensor):
            X = R.Tensor(X)
        if not isinstance(y, R.Tensor):
            y = R.Tensor(y)

        y_pred = self.predict(X)
        y_true = y

        if name == "r2":
            return metrics.r2_score(y_true, y_pred)
        else:
            return None

    def __str__(self):
        return "LinearRegression:Graph Id:{}\n".format(self.id)