Exemplo n.º 1
0
def generate_graph(w_vec_list=None):
    interval_for_plot = []

    np.random.seed(seed=SEED_)
    pos_data_points, neg_data_points = rand_point_generator(point_num=50)

    rotation_matrix = get_transformation(angle=ANGLE_)
    pos_transformed = np.dot(pos_data_points[:, 0:2], rotation_matrix)
    neg_transformed = np.dot(neg_data_points[:, 0:2], rotation_matrix)

    fig = plt.figure(1)
    plt.scatter([x[0] for x in pos_transformed],
                [x[1] for x in pos_transformed],
                c='r',
                marker='^')
    plt.scatter([x[0] for x in neg_transformed],
                [x[1] for x in neg_transformed],
                c='b',
                marker='^')

    #	plt.scatter([x[0] for x in pos_data_points], [x[1] for x in pos_data_points],  c='r')
    #	plt.scatter([x[0] for x in neg_data_points], [x[1] for x in neg_data_points],  c='b')
    interval_for_plot = np.arange(-2, 3)
    for vec_idx_, w_ in enumerate(w_vec_list):
        w_transformed = np.transpose(np.dot(np.transpose(w_), rotation_matrix))
        x_table_tmp = []
        y_table_tmp = []
        for points in interval_for_plot:
            x_table_tmp.append(points * w_transformed[0])
            y_table_tmp.append(points * w_transformed[1])
        plt.plot(x_table_tmp, y_table_tmp)
    plt.show()
def logistic_loss_1(w):
    '''
	x is the fake dataset wrapped with labels
	in this quite simple example we just have two data points
	in our set x
	update to test metric 2 for more specific relationship between hessian and margin

	input:
	x = np.array([2, 0, 0])

	output:
	loss values
	'''
    tmp_loss = 0
    #	x = np.array([[0, 2, 1], [2, 0, 0]])
    #	x = np.array([[2, 0, -1]])
    #	x = np.array([[1, -1, -1]])
    np.random.seed(seed=SEED_)
    _, neg_data_points = rand_point_generator(point_num=50)
    x = neg_data_points
    for idx, data_vec in enumerate(x):
        tmp_loss += math.log(1 +
                             math.exp(-data_vec[-1] *
                                      np.dot(np.transpose(w), data_vec[0:-1])))
    return 1 / float(x.shape[0]) * tmp_loss
def logcosh(w):
    '''a new kind of loss here'''
    tmp_loss = 0
    #	X = np.array([[0, 2, 1], [2, 0, 0]])
    #	X = np.array([[0, 2, 1], [2, 0, -1]])
    np.random.seed(seed=SEED_)
    pos_data_points, neg_data_points = rand_point_generator(point_num=50)
    X = np.concatenate((pos_data_points, neg_data_points), axis=0)
    for idx, data_vec in enumerate(X):
        tmp_loss += math.log(
            cosh(np.dot(np.transpose(w), data_vec[0:-1]) - data_vec[-1]))
    return 1 / float(X.shape[0]) * tmp_loss
def generate_graph(w_vec_list=None):
    interval_for_plot = []

    np.random.seed(seed=SEED_)
    pos_data_points, neg_data_points = rand_point_generator(point_num=50)
    fig = plt.figure(1)
    plt.scatter([x[0] for x in pos_data_points],
                [x[1] for x in pos_data_points],
                c='r')
    plt.scatter([x[0] for x in neg_data_points],
                [x[1] for x in neg_data_points],
                c='b')
    interval_for_plot = np.arange(-2, 50)
    for vec_idx_, w_ in enumerate(w_vec_list):
        x_table_tmp = []
        y_table_tmp = []
        for points in interval_for_plot:
            x_table_tmp.append(points * w_[0])
            y_table_tmp.append(points * w_[1])
        plt.plot(x_table_tmp, y_table_tmp)
    plt.show()
Exemplo n.º 5
0
def logistic_loss(w):
    '''x is the fake dataset wrapped with labels
		in this quite simple example we just have two data points
		in our set x

		input:
		x = np.array([0, 2, 1], [2, 0, 0])

		output:
		loss values
		'''
    tmp_loss = 0
    #	x = np.array([[0, 2, 1], [2, 0, 0]])
    #	x = np.array([[0, 2, 1], [2, 0, -1]])
    #	x = np.array([[-0.5, 1, 1], [0.7, -0.5, -1]])
    np.random.seed(seed=SEED_)
    pos_data_points, neg_data_points = rand_point_generator(point_num=50)

    rotation_matrix = get_transformation(angle=ANGLE_)
    pos_transformed = np.dot(pos_data_points[:, 0:2], rotation_matrix)
    neg_transformed = np.dot(neg_data_points[:, 0:2], rotation_matrix)
    x = np.concatenate((pos_transformed, neg_transformed), axis=0)
    x_new = np.zeros((x.shape[0], 3))
    for idx_x_p, x_p in enumerate(x):
        if idx_x_p <= 49:
            x_new[idx_x_p] = np.append(x_p, 1)
        else:
            x_new[idx_x_p] = np.append(x_p, -1)
    #x_ = np.concatenate((pos_data_points, neg_data_points), axis=0)
    #y_ = x[:,-1]

    for idx, data_vec in enumerate(x_new):
        tmp_loss += math.log(1 +
                             math.exp(-data_vec[-1] *
                                      np.dot(np.transpose(w), data_vec[0:-1])))


#		tmp_loss += math.log(1+math.exp(-y_[idx]*np.dot(np.transpose(w),data_vec)))
    return 1 / float(x.shape[0]) * tmp_loss
def logistic_loss(w):
    '''x is the fake dataset wrapped with labels
		in this quite simple example we just have two data points
		in our set x

		input:
		x = np.array([0, 2, 1], [2, 0, 0])

		output:
		loss values
		'''
    tmp_loss = 0
    #	x = np.array([[0, 2, 1], [2, 0, 0]])
    #	x = np.array([[0, 2, 1], [2, 0, -1]])
    #	x = np.array([[-0.5, 1, 1], [0.7, -0.5, -1]])
    np.random.seed(seed=SEED_)
    pos_data_points, neg_data_points = rand_point_generator(point_num=50)
    x = np.concatenate((pos_data_points, neg_data_points), axis=0)
    for idx, data_vec in enumerate(x):
        tmp_loss += math.log(1 +
                             math.exp(-data_vec[-1] *
                                      np.dot(np.transpose(w), data_vec[0:-1])))
    return 1 / float(x.shape[0]) * tmp_loss
Exemplo n.º 7
0
def logcosh(w):
    '''a new kind of loss here'''
    tmp_loss = 0
    #	X = np.array([[0, 2, 1], [2, 0, 0]])
    #	X = np.array([[0, 2, 1], [2, 0, -1]])
    np.random.seed(seed=SEED_)
    pos_data_points, neg_data_points = rand_point_generator(point_num=50)
    #	X = np.concatenate((pos_data_points, neg_data_points), axis=0)

    rotation_matrix = get_transformation(angle=ANGLE_)
    pos_transformed = np.dot(pos_data_points[:, 0:2], rotation_matrix)
    neg_transformed = np.dot(neg_data_points[:, 0:2], rotation_matrix)
    X = np.concatenate((pos_transformed, neg_transformed), axis=0)
    X_new = np.zeros((X.shape[0], 3))
    for idx_x_p, x_p in enumerate(X):
        if idx_x_p <= 49:
            X_new[idx_x_p] = np.append(x_p, 1)
        else:
            X_new[idx_x_p] = np.append(x_p, -1)

    for idx, data_vec in enumerate(X_new):
        tmp_loss += math.log(
            cosh(np.dot(np.transpose(w), data_vec[0:-1]) - data_vec[-1]))
    return 1 / float(X.shape[0]) * tmp_loss
def log_loss(w):
    """Log loss, aka logistic loss or cross-entropy loss.
    This is the loss function used in (multinomial) logistic regression
    and extensions of it such as neural networks, defined as the negative
    log-likelihood of the true labels given a probabilistic classifier's
    predictions. The log loss is only defined for two or more labels.
    For a single sample with true label yt in {0,1} and
    estimated probability yp that yt = 1, the log loss is
        -log P(yt|yp) = -(yt log(yp) + (1 - yt) log(1 - yp))
    Read more in the :ref:`User Guide <log_loss>`.
    Parameters
    ----------
    y_true : array-like or label indicator matrix
        Ground truth (correct) labels for n_samples samples.
    y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)
        Predicted probabilities, as returned by a classifier's
        predict_proba method. If ``y_pred.shape = (n_samples,)``
        the probabilities provided are assumed to be that of the
        positive class. The labels in ``y_pred`` are assumed to be
        ordered alphabetically, as done by
        :class:`preprocessing.LabelBinarizer`.
    eps : float
        Log loss is undefined for p=0 or p=1, so probabilities are
        clipped to max(eps, min(1 - eps, p)).
    normalize : bool, optional (default=True)
        If true, return the mean loss per sample.
        Otherwise, return the sum of the per-sample losses.
    sample_weight : array-like of shape = [n_samples], optional
        Sample weights.
    labels : array-like, optional (default=None)
        If not provided, labels will be inferred from y_true. If ``labels``
        is ``None`` and ``y_pred`` has shape (n_samples,) the labels are
        assumed to be binary and are inferred from ``y_true``.
        .. versionadded:: 0.18
    Returns
    -------
    loss : float
    Examples
    --------
    >>> log_loss(["spam", "ham", "ham", "spam"],  # doctest: +ELLIPSIS
    ...          [[.1, .9], [.9, .1], [.8, .2], [.35, .65]])
    0.21616...
    References
    ----------
    C.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer,
    p. 209.
    Notes
    -----
    The logarithm used is the natural logarithm (base-e).
    """
    # do this hack for hessian computation
    eps = 1e-15
    normalize = True
    sample_weight = None
    labels = None
    # define labels here
    np.random.seed(seed=SEED_)
    pos_data_points, neg_data_points = rand_point_generator(point_num=50)
    #==================================================================
    rotation_matrix = get_transformation(angle=ANGLE_)
    pos_transformed = np.dot(pos_data_points[:, 0:2], rotation_matrix)
    neg_transformed = np.dot(neg_data_points[:, 0:2], rotation_matrix)
    #==================================================================
    dataset = np.concatenate((pos_data_points, neg_data_points), axis=0)
    X = np.concatenate((pos_transformed, neg_transformed), axis=0)
    #X = dataset[:, 0:2]
    y_true = dataset[:, -1]
    #    X = np.array([[0,2], [2, 0]])
    #    y_true=np.array([1,0])
    #    y_true=np.array([1,-1])
    y_pred = np.zeros(X.shape[0])
    for i in range(X.shape[0]):
        y_pred[i] = np.dot(np.transpose(w), X[i])

    y_pred = check_array(y_pred, ensure_2d=False)
    check_consistent_length(y_pred, y_true)

    lb = LabelBinarizer()

    if labels is not None:
        lb.fit(labels)
    else:
        lb.fit(y_true)

    if len(lb.classes_) == 1:
        if labels is None:
            raise ValueError('y_true contains only one label ({0}). Please '
                             'provide the true labels explicitly through the '
                             'labels argument.'.format(lb.classes_[0]))
        else:
            raise ValueError('The labels array needs to contain at least two '
                             'labels for log_loss, '
                             'got {0}.'.format(lb.classes_))

    transformed_labels = lb.transform(y_true)

    if transformed_labels.shape[1] == 1:
        transformed_labels = np.append(1 - transformed_labels,
                                       transformed_labels,
                                       axis=1)

    # Clipping
    y_pred = np.clip(y_pred, eps, 1 - eps)

    # If y_pred is of single dimension, assume y_true to be binary
    # and then check.
    if y_pred.ndim == 1:
        y_pred = y_pred[:, np.newaxis]
    if y_pred.shape[1] == 1:
        y_pred = np.append(1 - y_pred, y_pred, axis=1)

    # Check if dimensions are consistent.
    transformed_labels = check_array(transformed_labels)
    if len(lb.classes_) != y_pred.shape[1]:
        if labels is None:
            raise ValueError("y_true and y_pred contain different number of "
                             "classes {0}, {1}. Please provide the true "
                             "labels explicitly through the labels argument. "
                             "Classes found in "
                             "y_true: {2}".format(transformed_labels.shape[1],
                                                  y_pred.shape[1],
                                                  lb.classes_))
        else:
            raise ValueError('The number of classes in labels is different '
                             'from that in y_pred. Classes found in '
                             'labels: {0}'.format(lb.classes_))

    # Renormalize
    y_pred /= y_pred.sum(axis=1)[:, np.newaxis]
    loss = -(transformed_labels * np.log(y_pred)).sum(axis=1)

    return _weighted_sum(loss, sample_weight, normalize)
Exemplo n.º 9
0
    plt.show()


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-m',
                        '--mode',
                        help='mode of this program',
                        required=True,
                        dest='mode')
    argument = parser.parse_args()
    mode = argument.mode

    # get the random dataset:
    np.random.seed(seed=SEED_)
    pos_data_points, neg_data_points = rand_point_generator(point_num=50)
    dataset = np.concatenate((pos_data_points, neg_data_points), axis=0)
    X = dataset[:, 0:2]
    y = dataset[:, -1]
    rotation_matrix = get_transformation(angle=ANGLE_)
    X_transformed = np.dot(X, rotation_matrix)

    # do some test here if under debug mode
    if mode == "debug":
        w, w_fake = find_hyperplane_vector(angle=2 * math.pi / 8)
        w_2, w_2_fake = find_hyperplane_vector(angle=math.pi / 3)
        H = nd.Hessian(logcosh)([float(w[0]), float(w[1])])
        print(np.linalg.eig(H)[0])
        interval_for_plot = []
        for i in range(-2, 3):
            interval_for_plot.append([i, 0])