Exemplo n.º 1
0
def matthews_score(y_true, y_pred, sample_weight=None, prob=0.5, pos_label=1):
    """Compute the Matthews correlation coefficient (MCC).

    The Matthews correlation coefficient is used in machine learning as a measure of 
    the quality of binary and multiclass classifications. 
    It takes into account true and false positives and negatives and is generally 
    regarded as a balanced measure which can be used even if the classes are of very different sizes. 
    The MCC is in essence a correlation coefficient value between -1 and +1. 
    A coefficient of +1 represents a perfect prediction, 
    0 an average random prediction and -1 an inverse prediction. 
    The statistic is also known as the phi coefficient.
    
    Args:
        y_true: pd.Series or array or list, ground truth (correct) labels.
        y_pred: pd.Series or array or list, predicted labels, as returned by a classifier.
        sample_weight: list or array or dict of sample weight.
        prob: probability threshold.
        pos_label: positive label.
    Returns:
        The Matthews correlation coefficient (+1 represents a perfect prediction, 0 an average random prediction and -1 and inverse prediction).
    """
    sample_weight = _sample_weight(y_true, sample_weight)
    t = classified_func(y_true, y_pred, prob=prob, pos_label=pos_label)
    t['weight'] = sample_weight
    tp = (((t.label==pos_label)&(t.prob==pos_label))*t.weight).sum()
    fp = (((t.label==pos_label)&(t.prob!=pos_label))*t.weight).sum()
    fn = (((t.label!=pos_label)&(t.prob==pos_label))*t.weight).sum()
    tn = (((t.label!=pos_label)&(t.prob!=pos_label))*t.weight).sum()
    return (tp*tn-fp*fn)/np.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))
Exemplo n.º 2
0
def auc_roc(y_true, y_pred, sample_weight=None, pos_label=1):
    """Area Under the Receiver Operating Characteristic Curve (ROC AUC)
    
    Args:
        y_true: pd.Series or array or list, ground truth (correct) labels.
        y_pred: pd.Series or array or list, predicted labels, as returned by a classifier.
        sample_weight: list or array or dict of sample weight.
        pos_label: positive label.
    Returns:
        Area Under the Receiver Operating Characteristic Curve (ROC AUC) from prediction scores.
    """
#     sample_weight = _sample_weight(y_true, sample_weight)
#     t = pd.DataFrame({'prob':y_pred, 'label':y_true, 'weight':sample_weight})
#     assert t.label.nunique()==2, "`y_true` should be binary classification."
#     t.insert(0, 'target', t[t.label!=pos_label].label.unique()[0])
#     t = t[t.label!=pos_label].merge(t[t.label==pos_label], on='target')
#     auc = ((t.prob_y>t.prob_x)*(t.weight_y+t.weight_x)/2).mean()+((t.prob_y==t.prob_x)*(t.weight_y+t.weight_x)/2).mean()/2
    
    sample_weight = _sample_weight(y_true, sample_weight)
    t = pd.DataFrame({'prob':y_pred, 'label':y_true, 'weight':sample_weight}).sort_values(['prob']).reset_index(drop=True)
    assert t.label.nunique()==2, "`y_true` should be binary classification."
    pos_rank = ((t[t.label==pos_label].index+1).values*(t[t.label==pos_label].weight.values)).sum()
    pos_cnt = t[t.label==pos_label].weight.sum()
    neg_cnt = t[t.label!=pos_label].weight.sum()
    auc = (pos_rank - pos_cnt*(pos_cnt+1)/2) / (pos_cnt*neg_cnt)
    return auc
Exemplo n.º 3
0
def crossentropy_categorical(y_true, y_pred, sample_weight=None, one_hot=False):
    """Computes the crossentropy metric between the labels and predictions.
    
    This is the crossentropy metric class to be used when there are multiple label classes (2 or more). 
    Here we assume that labels are given as a one_hot representation. 
    eg., When labels values are [2, 0, 1], y_true = [[0, 0, 1], [1, 0, 0], [0, 1, 0]].
    
    Args:
        y_true: pd.Series or array or list, ground truth (correct) labels.
        y_pred: pd.Series or array or list, predicted probability, as returned by a classifier.
        sample_weight: list or array or dict of sample weight.
        one_hot: default True, Whether y_true is a one_hot variable.
    Returns:
        categorical crossentropy of the positive class in categorical classification.
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    sample_weight = _sample_weight(y_true, sample_weight)
    assert y_pred.shape[1]==np.unique(np.array(y_true)).size, "`y_pred` and `y_true` dim not same."
    t = np.exp(y_pred.T-np.max(y_pred, axis=1))
    if one_hot:
        t = -((np.log(t/np.sum(t, axis=0)).T*pd.get_dummies(y_true)).sum(axis=1)*sample_weight).mean()
    else:
        t = -((np.log(t/np.sum(t, axis=0)).T*y_true).sum(axis=1)*sample_weight).mean()
    return t
Exemplo n.º 4
0
def iou_categorical(y_true, y_pred, sample_weight=None, target_class_ids=None):
    """Computes mean Intersection-Over-Union metric for one-hot encoded or categorical labels.
    
    Args:
        y_true: pd.Series or array or list, ground truth (correct) labels.
        y_pred: pd.Series or array or list, predicted labels, as returned by a classifier.
        sample_weight: list or array or dict of sample weight.
        target_class_ids: A tuple or list of target class ids for which the metric is returned. 
    Returns:
        the fraction of correctly classified samples (float).
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    if y_true.ndim!=1:
        y_true = np.argmax(y_true, axis=-1)
    if y_pred.ndim!=1:
        y_pred = np.argmax(y_pred, axis=-1)
    sample_weight = _sample_weight(y_true, sample_weight)
    t = pd.DataFrame({'prob':y_pred.flatten(), 'label':y_true.flatten(), 'weight':sample_weight})
    if target_class_ids is None:
        target_class_ids = t.label.unique().tolist()
    result = {'IoU_mean':[], 'IoU_class':{i:0 for i in target_class_ids}}
    for i in t.label.unique():
        if i in target_class_ids:
            tp = (((t.label==i)&(t.prob==i))*t.weight).sum()
            fp = (((t.label==i)&(t.prob!=i))*t.weight).sum()
            fn = (((t.label!=i)&(t.prob==i))*t.weight).sum()
            result['IoU_mean'].append(tp/(tp+fp+fn))
            result['IoU_class'][i] = tp/(tp+fp+fn)
    result['IoU_mean'] = sum(result['IoU_mean'])/len(result['IoU_mean'])
    return result
Exemplo n.º 5
0
def levenshtein(x, y, normalize=False, sample_weight=None):
    """levenshtein distance.
    
    Args:
        x: pd.Series or array or list, sample n dim feature value.
        y: pd.Series or array or list, sample n dim feature value.
        normalize: bool, default False, if normalize is True, levenshtein distance should be distance/max(len(x), len(y)).
        sample_weight: list or array of sample weight.
    Returns:
        levenshtein distance value.
    """
    def levenshtein1(s1, s2, weight, normalize=False):
        if len(s1) < len(s2):
            return levenshtein1(s2, s1, normalize)
        if not s2:
            return len(s1)

        a = range(len(s2) + 1)
        for i, c1 in enumerate(s1):
            b = [i+1]
            for j, c2 in enumerate(s2):
                b.append(min(a[j+1]+1, b[j]+1, a[j]+(c1 != c2)))
            a = b

        if normalize:
            return (b[-1] / len(s1))*weight
        return b[-1]*weight
    sample_weight = _sample_weight(x, sample_weight)
    return pd.DataFrame({'label1':x, 'label2':y, 'weight':sample_weight}).apply(lambda x:levenshtein1(x[0], x[1], x[2], normalize=normalize), axis=1).sum()
Exemplo n.º 6
0
def normal_loss(y_true, y_pred, k, log=False, root=False, sample_weight=None):
    """Mean normal error regression loss.
    
    Args:
        y_true: pd.Series or array or list, ground truth (correct) labels.
        y_pred: pd.Series or array or list, predicted values, as returned by a regression.
        k: int, loss = np.sqrt(loss, 1/k).
        log: default False, whether to log the variable.
        root: default False, whether to sqrt the variable, if True, return rmse loss.
        sample_weight: list or array of sample weight.
    Returns:
        regression loss values.
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    sample_weight = _sample_weight(y_true, sample_weight)
    if log:
        loss = (np.power(
            np.abs(np.log1p(y_true) - np.log1p(y_pred)) * sample_weight,
            k)).mean()
    else:
        loss = (np.power(np.abs(y_true - y_pred) * sample_weight, k)).mean()
    if root:
        loss = np.power(loss, 1 / k)
    return loss
Exemplo n.º 7
0
def mean_tweedie_deviance(y_true, y_pred, p, sample_weight=None):
    """Mean Tweedie deviance regression loss.
    
    when p=0 it is equivalent to mean_squared_error.
    when p=1 it is equivalent to mean_poisson_deviance.
    when p=2 it is equivalent to mean_gamma_deviance.

    p < 0: Extreme stable distribution. Requires: y_pred > 0.
    p = 0 : Normal distribution, output corresponds to mean_squared_error. y_true and y_pred can be any real numbers.
    p = 1 : Poisson distribution. Requires: y_true >= 0 and y_pred > 0.
    1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0 and y_pred > 0.
    p = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.
    p = 3 : Inverse Gaussian distribution. Requires: y_true > 0 and y_pred > 0.
    otherwise : Positive stable distribution. Requires: y_true > 0 and y_pred > 0.

    Args:
        y_true: pd.Series or array or list, ground truth (correct) labels.
        y_pred: pd.Series or array or list, predicted values, as returned by a regression.
        p: Tweedie power parameter. Either p <= 0 or p >= 1.
           The higher p the less weight is given to extreme deviations between true and predicted targets.
        sample_weight: list or array of sample weight.
    Returns:
        A non-negative floating point value (the best value is 0.0).
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    if p < 0:
        if (y_pred <= 0).any():
            raise ValueError(
                "p < 0: Extreme stable distribution. Requires: y_pred > 0.")
    elif p == 0:
        pass
    elif 0 < p < 1:
        raise ValueError("Tweedie deviance is only defined for p<=0 and p>=1.")
    elif 1 <= p < 2:
        if (y_true < 0).any() or (y_pred <= 0).any():
            raise ValueError(
                "1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0 and y_pred > 0."
            )
    else:
        if (y_true <= 0).any() or (y_pred <= 0).any():
            raise ValueError(
                "p>=2, Positive stable distribution. Requires: y_true > 0 and y_pred > 0."
            )

    if p == 0:
        t = np.square(y_true - y_pred)
    elif p == 1:
        t = 2 * (y_true * np.log(y_true / y_pred) + y_pred - y_true)
    elif p == 2:
        t = 2 * (np.log(y_pred / y_true) + y_true / y_pred - 1)
    else:
        t = 2 * (np.power(np.maximum(y_true, 0), 2 - p) /
                 ((1 - p) * (2 - p)) - y_true * np.power(y_pred, 1 - p) /
                 (1 - p) + np.power(y_pred, 2 - p) / (2 - p))
    sample_weight = _sample_weight(y_true, sample_weight)
    return np.average(t, weights=sample_weight)
Exemplo n.º 8
0
def ochiia(x, y):
    """ochiia distance.
    
    Args:
        x: pd.Series or array or list, sample n dim feature value.
        y: pd.Series or array or list, sample n dim feature value.
    Returns:
        ochiia distance value.
    """
    sample_weight = _sample_weight(x, sample_weight)
    return 1 - len(set(x).intersection(set(y)))/np.sqrt(len(set(x))*len(set(y)))
Exemplo n.º 9
0
def hamming(x, y, sample_weight=None):
    """hamming distance.
    
    Args:
        x: pd.Series or array or list, sample n dim feature value.
        y: pd.Series or array or list, sample n dim feature value.
        sample_weight: list or array of sample weight.
    Returns:
        hamming distance value.
    """
    sample_weight = _sample_weight(x, sample_weight)
    return sum(map(lambda x,y,z:(x!=y)*z, x, y, sample_weight))/sample_weight.sum()
Exemplo n.º 10
0
def hellinger(x, y, sample_weight=None):
    """hellinger distance.
    
    Args:
        x: pd.Series or array or list, sample n dim feature value.
        y: pd.Series or array or list, sample n dim feature value.
        sample_weight: list or array of sample weight.
    Returns:
        hellinger distance value.
    """
    sample_weight = _sample_weight(x, sample_weight)
    return 1/np.sqrt(2)*np.linalg.norm((np.sqrt(x)-np.sqrt(y))*sample_weight)
Exemplo n.º 11
0
def manhattan(x, y, sample_weight=None):
    """manhattan distance.
    
    Args:
        x: pd.Series or array or list, sample n dim feature value.
        y: pd.Series or array or list, sample n dim feature value.
        sample_weight: list or array of sample weight.
    Returns:
        manhattan distance value.
    """
    sample_weight = _sample_weight(x, sample_weight)
    return np.sum(np.abs(np.array(x)-np.array(y))*sample_weight)
Exemplo n.º 12
0
def chebyshev(x, y, sample_weight=None):
    """chebyshev distance.
    
    Args:
        x: pd.Series or array or list, sample n dim feature value.
        y: pd.Series or array or list, sample n dim feature value.
        sample_weight: list or array of sample weight.
    Returns:
        chebyshev distance value.
    """
    sample_weight = _sample_weight(x, sample_weight)
    return np.max((np.array(x)-np.array(y))*sample_weight)
Exemplo n.º 13
0
def f1_score(y_true, y_pred, sample_weight=None, prob=0.5, pos_label=1):
    """
    Args:
        y_true: pd.Series or array or list, ground truth (correct) labels.
        y_pred: pd.Series or array or list, predicted labels, as returned by a classifier.
        sample_weight: list or array or dict of sample weight.
        prob: probability threshold.
        pos_label: positive label.
    Returns:
        F1 score of the positive class in binary classification.
    """
    sample_weight = _sample_weight(y_true, sample_weight)
    return fbeta_score(y_true, y_pred, beta=1, sample_weight=sample_weight, prob=prob, pos_label=pos_label)
Exemplo n.º 14
0
def minkowski(x, y, p, sample_weight=None):
    """minkowski distance.
    
    Args:
        x: pd.Series or array or list, sample n dim feature value.
        y: pd.Series or array or list, sample n dim feature value.
        p: int, norm dimension.
        sample_weight: list or array of sample weight.
    Returns:
        minkowski distance value.
    """
    sample_weight = _sample_weight(x, sample_weight)
    return np.power(np.sum(np.power(np.abs(np.array(x)-np.array(y))*sample_weight, p)), 1/p)
Exemplo n.º 15
0
def poisson(y_true, y_pred, sample_weight=None):
    """Computes the Poisson loss between y_true and y_pred.
    
    Args:
        y_true: pd.Series or array or list, ground truth (correct) labels.
        y_pred: pd.Series or array or list, predicted values, as returned by a regression.
        sample_weight: list or array of sample weight.
    Returns:
        regression loss values.
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    sample_weight = _sample_weight(y_true, sample_weight)
    return np.mean((y_pred - y_true * np.log(y_pred + 1e-7)) * sample_weight)
Exemplo n.º 16
0
def cosine(x, y, sample_weight=None):
    """cosine distance.
    
    Args:
        x: pd.Series or array or list, sample n dim feature value.
        y: pd.Series or array or list, sample n dim feature value.
        sample_weight: list or array of sample weight.
    Returns:
        cosine distance value.
    """
    x = np.array(x)
    y = np.array(y)
    sample_weight = _sample_weight(x, sample_weight)
    return (x*y*sample_weight).sum()/np.sqrt(np.square(x).sum())/np.sqrt(np.square(y).sum())
Exemplo n.º 17
0
def hit_ratio(y_true, y_pred, k, sample_weight=None):
    """Hit Ratio k
    
    Args:
        y_true: pd.Series or array or list, ground truth (correct) labels.
        y_pred: pd.Series or array or list, predicted values, as returned by a rank.
        k: int, top k predict values.
        sample_weight: list or array of sample weight.
    Returns:
        Hit Ratio k values.
    """
    sample_weight = _sample_weight(y_true, sample_weight)
    t = pd.DataFrame({'label1':y_true, 'label2':y_pred, 'weight':sample_weight})
    return t.apply(lambda x:len(set(x[0]).intersection(set(x[1][:k])))*x[2], axis=1).sum()/t.label1.map(lambda x:len(set(x))).sum()
Exemplo n.º 18
0
def explained_variance_score(y_true, y_pred, sample_weight=None):
    """explained variance regression loss.
    
    Args:
        y_true: pd.Series or array or list, ground truth (correct) labels.
        y_pred: pd.Series or array or list, predicted values, as returned by a regression.
        sample_weight: list or array of sample weight.
    Returns:
        regression loss values.
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    sample_weight = _sample_weight(y_true, sample_weight)
    return 1 - ((y_true - y_pred) * sample_weight).std()**2 / y_true.std()**2
Exemplo n.º 19
0
def bhattacharyya(x, y, sample_weight=None):
    """bhattacharyya distance.
    
    Args:
        x: pd.Series or array or list, sample n dim feature value.
        y: pd.Series or array or list, sample n dim feature value.
        sample_weight: list or array of sample weight.
    Returns:
        bhattacharyya distance value.
    """
    x = np.array(x)
    y = np.array(y)
    sample_weight = _sample_weight(x, sample_weight)
    return np.log(np.sum(np.sqrt(x * y*sample_weight)))
Exemplo n.º 20
0
def braycurtis(x, y, sample_weight=None):
    """braycurtis distance.
    
    Args:
        x: pd.Series or array or list, sample n dim feature value.
        y: pd.Series or array or list, sample n dim feature value.
        sample_weight: list or array of sample weight.
    Returns:
        braycurtis distance value.
    """
    x = np.array(x)
    y = np.array(y)
    sample_weight = _sample_weight(x, sample_weight)
    return np.sum(np.abs(x-y)*sample_weight)/(np.sum(x)+np.sum(y))
Exemplo n.º 21
0
def median_absolute_error(y_true, y_pred, sample_weight=None):
    """Median absolute error regression loss.
    
    Args:
        y_true: pd.Series or array or list, ground truth (correct) labels.
        y_pred: pd.Series or array or list, predicted values, as returned by a regression.
        sample_weight: list or array of sample weight.
    Returns:
        regression loss values.
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    sample_weight = _sample_weight(y_true, sample_weight)
    return np.median(np.abs(y_true - y_pred) * sample_weight)
Exemplo n.º 22
0
def accuracy_binary(y_true, y_pred, sample_weight=None, prob=0.5, pos_label=1):
    """Calculates how often predictions match binary labels.
    
    Args:
        y_true: pd.Series or array or list, ground truth (correct) labels.
        y_pred: pd.Series or array or list, predicted labels, as returned by a classifier.
        sample_weight: list or array or dict of sample weight.
        prob: probability threshold.
        pos_label: positive label.
    Returns:
        the fraction of correctly classified samples (float).
    """
    sample_weight = _sample_weight(y_true, sample_weight)
    t = classified_func(y_true, y_pred, prob=prob, pos_label=pos_label)
    return ((t.label==t.prob)*sample_weight).mean()
Exemplo n.º 23
0
def r2_score(y_true, y_pred, sample_weight=None):
    """r2 regression loss.
    
    Args:
        y_true: pd.Series or array or list, ground truth (correct) labels.
        y_pred: pd.Series or array or list, predicted values, as returned by a regression.
        sample_weight: list or array of sample weight.
    Returns:
        regression loss values.
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    sample_weight = _sample_weight(y_true, sample_weight)
    return 1 - np.power((y_true - y_pred) * sample_weight, 2).sum() / np.power(
        (y_true - y_true.mean()) * sample_weight, 2).sum()
Exemplo n.º 24
0
def log_cosh_error(y_true, y_pred, sample_weight=None):
    """Computes the logarithm of the hyperbolic cosine of the prediction error.
    
    Args:
        y_true: pd.Series or array or list, ground truth (correct) labels.
        y_pred: pd.Series or array or list, predicted values, as returned by a regression.
        sample_weight: list or array of sample weight.
    Returns:
        regression loss values.
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    x = (y_pred - y_true) * sample_weight
    sample_weight = _sample_weight(y_true, sample_weight)
    return np.mean(x + np.log(np.exp(-2. * x) + 1.) - np.log(2.))
Exemplo n.º 25
0
def precision(y_true, y_pred, sample_weight=None, prob=0.5, pos_label=1):
    """Computes the precision of the predictions with respect to the labels.
    
    Args:
        y_true: pd.Series or array or list, ground truth (correct) labels.
        y_pred: pd.Series or array or list, predicted labels, as returned by a classifier.
        sample_weight: list or array or dict of sample weight.
        prob: probability threshold.
        pos_label: positive label.
    Returns:
        Precision of the positive class in binary classification.
    """
    sample_weight = _sample_weight(y_true, sample_weight)
    t = classified_func(y_true, y_pred, prob=prob, pos_label=pos_label)
    return (t.label*sample_weight)[t.prob==pos_label].mean()
Exemplo n.º 26
0
def canberra(x, y, sample_weight=None):
    """canberra distance.
    
    Args:
        x: pd.Series or array or list, sample n dim feature value.
        y: pd.Series or array or list, sample n dim feature value.
        sample_weight: list or array of sample weight.
    Returns:
        canberra distance value.
    """
    assert len(x)==len(y), 'x shape should be same with y.'
    x = pd.Series(x)
    y = pd.Series(y)
    sample_weight = _sample_weight(x, sample_weight)
    return ((x-y)*sample_weight/(x.abs()+y.abs())).sum()
Exemplo n.º 27
0
def mean_relative_error(y_true, y_pred, normalizer, sample_weight=None):
    """Computes the mean relative error by normalizing with the given values.
    
    Args:
        y_true: pd.Series or array or list, ground truth (correct) labels.
        y_pred: pd.Series or array or list, predicted values, as returned by a regression.
        normalizer: The normalizer values with same shape as y_pred.
        sample_weight: list or array of sample weight.
    Returns:
        regression loss values.
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    sample_weight = _sample_weight(y_true, sample_weight)
    return (np.abs(y_true - y_pred) / np.array(normalizer) *
            sample_weight).mean()
Exemplo n.º 28
0
def mean_absolute_percentage_error(y_true, y_pred, sample_weight=None):
    """Mean absolute percentage error regression loss.
    
    Args:
        y_true: pd.Series or array or list, ground truth (correct) labels.
        y_pred: pd.Series or array or list, predicted values, as returned by a regression.
        sample_weight: list or array of sample weight.
    Returns:
        regression loss values.
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    sample_weight = _sample_weight(y_true, sample_weight)
    value = np.abs(y_pred - y_true)
    y_true[np.where(y_true == 0)] = 1
    return (value / np.abs(y_true) * sample_weight).mean()
Exemplo n.º 29
0
def chisquare(x, y, sample_weight=None):
    """chi square distance.
    
    Args:
        x: pd.Series or array or list, sample n dim feature value.
        y: pd.Series or array or list, sample n dim feature value.
        sample_weight: list or array of sample weight.
    Returns:
        chi square distance value.
    """
    x = np.asarray(x, np.int32)
    y = np.asarray(y, np.int32)
    sample_weight = _sample_weight(x, sample_weight)
    value = np.square((x-y)*sample_weight)
    y[np.where(y==0)] = 1
    return np.sum(value/y)
Exemplo n.º 30
0
def euclidean(x, y, normalize=False, sample_weight=None):
    """euclidean distance.
    
    Args:
        x: pd.Series or array or list, sample n dim feature value.
        y: pd.Series or array or list, sample n dim feature value.
        normalize: default False, std=pd.concat([x, y]).std() if normalize else 1.
        sample_weight: list or array of sample weight.
    Returns:
        euclidean distance value.
    """
    x = pd.Series(x)
    y = pd.Series(y)
    sample_weight = _sample_weight(x, sample_weight)
    std = pd.concat([x, y], axis=1).std(axis=1) if normalize else 1
    return np.sqrt(np.sum(np.square((x-y)*sample_weight/std)))