Exemplo n.º 1
0
def root_mean_square_error(y_real, y_pred):
    #print "...root_mean_square_error"
    """
    It computes the root mean squared difference (RMSE)
    between predicted and actual ratings for users.

    Parameters
    ----------
    y_real : array-like

    y_pred : array-like

    Returns
    -------

    Positive floating point value: the best value is 0.0.

    return the mean square error

    """
    #istart = time.time()
    y_real, y_pred = check_arrays(y_real, y_pred)

    rmse =  np.sqrt((np.sum((y_pred - y_real) ** 2)) / y_real.shape[0])
    #isend = time.time()
    #print "rmse: ",rmse , "seconds: ", isend-istart
    return rmse
Exemplo n.º 2
0
def mean_absolute_error(y_real, y_pred):
    #print "...mean_absolute_error"
    """
    It computes the average absolute difference (MAE)
    between predicted and actual ratings for users.

    Parameters
    ----------
    y_real : array-like

    y_pred : array-like

    Returns
    -------

    Positive floating point value: the best value is 0.0.

    return the mean absolute error


    """
    istart = time.time()
    y_real, y_pred = check_arrays(y_real, y_pred)

    mae =  np.sum(np.abs(y_pred - y_real)) / y_real.size
    isend = time.time()
    #print "mae: ",mae , "seconds: ", isend-istart
    return mae
Exemplo n.º 3
0
def normalized_mean_absolute_error(y_real, y_pred, max_rating, min_rating):
    """
    It computes the normalized average absolute difference (NMAE)
    between predicted and actual ratings for users.

    Parameters
    ----------
    y_real : array-like
        The real ratings.

    y_pred : array-like
        The predicted ratings.

    max_rating:
        The maximum rating of the model.

    min_rating:
        The minimum rating of the model.

    Returns
    -------

    Positive floating point value: the best value is 0.0.

    return the normalized mean absolute error


    """
    y_real, y_pred = check_arrays(y_real, y_pred)
    mae = mean_absolute_error(y_real, y_pred)
    return mae / (max_rating - min_rating)
Exemplo n.º 4
0
def lcss_dist(X, Y, delta, epsilon):
    """Compute the LCSS distance between X and Y using Dynamic Programming.

    :param X (array): time series feature array denoted by X
    :param Y (array): time series feature array denoted by Y
    :param delta (int): time sample matching threshold
    :param epsilon (float): amplitude matching threshold
    :returns: distance between X and Y with the best alignment
    :Reference: M Vlachos et al., "Discovering Similar Multidimensional Trajectories", 2002
    """
    X, Y = check_arrays(X, Y)
    dist = _lcss_dist(X, Y, delta, epsilon)
    return dist
Exemplo n.º 5
0
def edr_dist(X, Y, epsilon):
    """Compute the EDR distance between X and Y using Dynamic Programming.

    :param X (array): time series feature array denoted by X
    :param Y (array): time series feature array denoted by Y
    :param epsilon (float): matching threshold
    :returns: distance between X and Y with the best alignment
    :Reference: L. Chen et al., "Robust and Fast Similarity Search for Moving Object Trajectories", 2005.
    """
    X, Y = check_arrays(X, Y)
    X = standardization(X)
    Y = standardization(Y)
    dist = _edr_dist(X, Y, epsilon)
    return dist
Exemplo n.º 6
0
def dtw_dist(X, Y, w=_np.inf, mode="dependent"):
    """Compute multidimensional DTW distance between X and Y using Dynamic Programming.

    :param X (array): time series feature array denoted by X
    :param Y (array): time series feature array denoted by Y
    :param w (int): window size (default=Inf)
    :param mode (string): "dependent" or "independent" (default="dependent")
    :returns: distance between X and Y with the best alignment
    :Reference: https://www.cs.unm.edu/~mueen/DTW.pdf
    """
    X, Y = check_arrays(X, Y)
    if mode == "dependent":
        dist = _dtw_dist(X, Y, w)
    elif mode == "independent":
        n_feature = X.shape[0]
        dist = 0
        for i in range(n_feature):
            dist += _dtw_dist(X[[i], :], Y[[i], :], w)
    else:
        raise ValueError(
            "The mode must be either \"dependent\" or \"independent\".")
    return dist
Exemplo n.º 7
0
def root_mean_square_error(y_real, y_pred):
    """
    It computes the root mean squared difference (RMSE)
    between predicted and actual ratings for users.

    Parameters
    ----------
    y_real : array-like

    y_pred : array-like

    Returns
    -------

    Positive floating point value: the best value is 0.0.

    return the mean square error

    """
    y_real, y_pred = check_arrays(y_real, y_pred)

    return np.sqrt((np.sum((y_pred - y_real) ** 2)) / y_real.shape[0])
Exemplo n.º 8
0
def mean_absolute_error(y_real, y_pred):
    """
    It computes the average absolute difference (MAE)
    between predicted and actual ratings for users.

    Parameters
    ----------
    y_real : array-like

    y_pred : array-like

    Returns
    -------

    Positive floating point value: the best value is 0.0.

    return the mean absolute error


    """
    y_real, y_pred = check_arrays(y_real, y_pred)

    return np.sum(np.abs(y_pred - y_real)) / y_real.size if y_real.size != 0 else None
Exemplo n.º 9
0
def normalized_mean_absolute_error(y_real, y_pred, max_rating, min_rating):
    #print "...normalized_mean_absolute_error"
    """
    It computes the normalized average absolute difference (NMAE)
    between predicted and actual ratings for users.

    Parameters
    ----------
    y_real : array-like
        The real ratings.

    y_pred : array-like
        The predicted ratings.

    max_rating:
        The maximum rating of the model.

    min_rating:
        The minimum rating of the model.

    Returns
    -------

    Positive floating point value: the best value is 0.0.

    return the normalized mean absolute error


    """
    istart = time.time()
    y_real, y_pred = check_arrays(y_real, y_pred)
    mae = mean_absolute_error(y_real, y_pred)
    isend = time.time()
    #print "nmae: ",mae / (max_rating - min_rating), "seconds: ", isend-istart

    return mae / (max_rating - min_rating)
Exemplo n.º 10
0
def precision_recall_fscore(y_real, y_pred, beta=1.0):

    """Compute precisions, recalls, f-measures
       for recommender systems


    The precision is the ratio :math:`tp / (tp + fp)` where tp is the number of
    true positives and fp the number of false positives. In recommender systems
    ...

    The recall is the ratio :math:`tp / (tp + fn)` where tp is the number of
    true positives and fn the number of false negatives. In recommender
    systems...

    The F_beta score can be interpreted as a weighted harmonic mean of
    the precision and recall, where an F_beta score reaches its best
    value at 1 and worst score at 0.

    The F_beta score weights recall beta as much as precision. beta = 1.0 means
    recall and precision are as important.

    Parameters
    ----------
    y_real : array, shape = [n_samples]
        true recommended items

    y_pred : array, shape = [n_samples]
        predicted recommended items

    beta : float, 1.0 by default
        the strength of recall versus precision in the f-score

    Returns
    -------
    precision: array, shape = [n_unique_labels], dtype = np.double
    recall: array, shape = [n_unique_labels], dtype = np.double
    f1_score: array, shape = [n_unique_labels], dtype = np.double

    References
    ----------
    http://en.wikipedia.org/wiki/Precision_and_recall

    """
    y_real, y_pred = check_arrays(y_real, y_pred)
    assert(beta > 0)

    n_users = y_real.shape[0]
    precision = np.zeros(n_users, dtype=np.double)
    recall = np.zeros(n_users, dtype=np.double)
    fscore = np.zeros(n_users, dtype=np.double)

    try:
        # oddly, we may get an "invalid" rather than a "divide" error here
        old_err_settings = np.seterr(divide='ignore', invalid='ignore')

        for i, y_items_pred in enumerate(y_pred):
            intersection_size = np.intersect1d(y_items_pred, y_real[i]).size
            precision[i] = (intersection_size / float(len(y_real[i]))) \
                                    if len(y_real[i])  else 0.0
            recall[i] = (intersection_size / float(len(y_items_pred))) \
                                    if len(y_items_pred) else 0.0

        # handle division by 0.0 in precision and recall
        precision[np.isnan(precision)] = 0.0
        recall[np.isnan(precision)] = 0.0

        #fbeta Score
        beta2 = beta ** 2
        fscore = (1 + beta2) * (precision * recall) \
                    / (beta2 * precision + recall)

        #handle division by 0.0 in fscore
        fscore[(precision + recall) == 0.0] = 0.0

    finally:
        np.seterr(**old_err_settings)

    return precision, recall, fscore
Exemplo n.º 11
0
def smacof(similarities, metric=True, n_components=2, init=None, n_init=8,
           n_jobs=1, max_iter=300, verbose=0, eps=1e-3, random_state=None):
    """
    Computes multidimensional scaling using SMACOF (Scaling by Majorizing a
    Complicated Function) algorithm

    The SMACOF algorithm is a multidimensional scaling algorithm: it minimizes
    a objective function, the *stress*, using a majorization technique. The
    Stress Majorization, also known as the Guttman Transform, guarantees a
    monotone convergence of Stress, and is more powerful than traditional
    techniques such as gradient descent.

    The SMACOF algorithm for metric MDS can summarized by the following steps:

    1. Set an initial start configuration, randomly or not.
    2. Compute the stress
    3. Compute the Guttman Transform
    4. Iterate 2 and 3 until convergence.

    The nonmetric algorithm adds a monotonic regression steps before computing
    the stress.

    Parameters
    ----------
    similarities : symmetric ndarray, shape (n_samples, n_samples)
        similarities between the points

    metric : boolean, optional, default: True
        compute metric or nonmetric SMACOF algorithm

    n_components : int, optional, default: 2
        number of dimension in which to immerse the similarities
        overridden if initial array is provided.

    init : {None or ndarray of shape (n_samples, n_components)}, optional
        if None, randomly chooses the initial configuration
        if ndarray, initialize the SMACOF algorithm with this array

    n_init : int, optional, default: 8
        Number of time the smacof algorithm will be run with different
        initialisation. The final results will be the best output of the
        n_init consecutive runs in terms of stress.

    n_jobs : int, optional, default: 1

        The number of jobs to use for the computation. This works by breaking
        down the pairwise matrix into n_jobs even slices and computing them in
        parallel.

        If -1 all CPUs are used. If 1 is given, no parallel computing code is
        used at all, which is useful for debugging. For n_jobs below -1,
        (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one
        are used.

    max_iter : int, optional, default: 300
        Maximum number of iterations of the SMACOF algorithm for a single run

    verbose : int, optional, default: 0
        level of verbosity

    eps : float, optional, default: 1e-6
        relative tolerance w.r.t stress to declare converge

    random_state : integer or numpy.RandomState, optional
        The generator used to initialize the centers. If an integer is
        given, it fixes the seed. Defaults to the global numpy random
        number generator.

    Returns
    -------
    X : ndarray (n_samples,n_components)
        Coordinates of the n_samples points in a n_components-space

    stress : float
        The final value of the stress (sum of squared distance of the
        disparities and the distances for all constrained points)

    Notes
    -----
    "Modern Multidimensional Scaling - Theory and Applications" Borg, I.;
    Groenen P. Springer Series in Statistics (1997)

    "Nonmetric multidimensional scaling: a numerical method" Kruskal, J.
    Psychometrika, 29 (1964)

    "Multidimensional scaling by optimizing goodness of fit to a nonmetric
    hypothesis" Kruskal, J. Psychometrika, 29, (1964)
    """

    similarities, = check_arrays(similarities, sparse_format='dense')
    random_state = check_random_state(random_state)

    if hasattr(init, '__array__'):
        init = np.asarray(init).copy()
        if not n_init == 1:
            warnings.warn(
                'Explicit initial positions passed: '
                'performing only one init of the MDS instead of %d'
                % n_init)
            n_init = 1

    best_pos, best_stress = None, None

    if n_jobs == 1:
        for it in range(n_init):
            pos, stress = _smacof_single(similarities, metric=metric,
                                         n_components=n_components, init=init,
                                         max_iter=max_iter, verbose=verbose,
                                         eps=eps, random_state=random_state)
            if best_stress is None or stress < best_stress:
                best_stress = stress
                best_pos = pos.copy()
    else:
        seeds = random_state.randint(np.iinfo(np.int32).max, size=n_init)
        results = Parallel(n_jobs=n_jobs, verbose=max(verbose - 1, 0))(
            delayed(_smacof_single)(
                similarities, metric=metric, n_components=n_components,
                init=init, max_iter=max_iter, verbose=verbose, eps=eps,
                random_state=seed)
            for seed in seeds)
        positions, stress = zip(*results)
        best = np.argmin(stress)
        best_stress = stress[best]
        best_pos = positions[best]
    return best_pos, best_stress