Exemplo n.º 1
0
def squared_distance(v1, v2):
    """
    Compute the squared distance between two vectors.

    Args:
        v1, v2 (List): The vectors for which a squared distance will be
        computed.

    Returns:
        A scalar of the squared distance between the vectors.

    Examples:
        >>> x = [1, 2, 3]
        >>> y = [3, 2, 1]
        >>> squared_distance(x, y)
        8

        >>> e = [1,2,[1,2]]
        >>> squared_distance(x, e)
        Traceback (most recent call last):
            ...
        IndexError: One of the vectors passed is not a valid vector
    """
    if not (valid.is_vector(v1) and valid.is_vector(v2)):
        raise IndexError("One of the vectors passed is not a valid vector")

    return sum_of_squares(vector_subtract(v1, v2))
Exemplo n.º 2
0
def distance(v1, v2):
    """
    Compute the distance between two vectors.

    Args:
        v1, v2 (List): The vectors for which a distance will be computed.

    Returns:
        A scalar of the distance between the vectors.

    Examples:
        >>> x = [1, 2, 3]
        >>> y = [3, 2, 1]
        >>> distance(x, y)
        2.8284271247461903

        >>> e = [1, 2, [1, 2]]
        >>> distance(x, e)
        Traceback (most recent call last):
            ...
        IndexError: One of the vectors passed is not a valid vector
    """
    if not (valid.is_vector(v1) and valid.is_vector(v2)):
        raise IndexError("One of the vectors passed is not a valid vector")

    return magnitude(vector_subtract(v1, v2))
Exemplo n.º 3
0
def mode(vector):
    """
    Returns a list containing the most common value. Returns a list of
    len() > 1 in the case of ties.

    Args:
        vector (List): A list vector for which a mode is sought.

    Returns:
        A List containing one or more values which are the most frequent.

    Examples:
        >>> x = [1, 2, 3]
        >>> mode(x)
        [1, 2, 3]

        >>> y = [1, 2, 3, 2]
        >>> mode(y)
        [2]

        >>> z = [1, 2, [3, 4]]
        >>> mode(z)
        Traceback (most recent call last):
            ...
        IndexError: The vector passed is not a valid vector
    """
    if not (valid.is_vector(vector) and valid.is_vector(vector)):
        raise IndexError("The vector passed is not a valid vector")

    counts = Counter(vector)
    max_count = max(counts.values())

    return [x_i for x_i, count in counts.items() if count == max_count]
Exemplo n.º 4
0
def mean(vector):
    """
    Return the simple arithmatic mean of a vector.

    Args:
        vector (List): A list vector containing the values to calculate
        against.

    Returns:
        A scalar Float value representing the mean of the vector.

    Examples:
        >>> x = [2,2,2]
        >>> mean(x)
        2.0

        >>> y = [1,2,3]
        >>> mean(y)
        2.0

        >>> z = [1, 2, 3, [1, 2]]
        >>> mean(z)
        Traceback (most recent call last):
            ...
        IndexError: The vector passed is not a valid vector
    """
    if not (valid.is_vector(vector) and valid.is_vector(vector)):
        raise IndexError("The vector passed is not a valid vector")

    return sum(vector) / len(vector)
Exemplo n.º 5
0
def correlation(v_1, v_2):
    """
    Calculates the correlation between two vectors.

    Args:
        v_1, v_2 (List): List vectors of the same length.

    Returns:
        A scalar value representing the correlation.

    Examples:
        >>> x = [1, 2, 3]
        >>> y = [3, 2, 1]
        >>> correlation(x, y)
        -1.0

        >>> z = [1, 2]
        >>> correlation(x, z)
        Traceback (most recent call last):
            ...
        IndexError: The two vectors must be the same length.

        >>> zz = ['a', 'b', 'c']
        >>> correlation(x, zz) # doctest: +NORMALIZE_WHITESPACE
        Traceback (most recent call last):
            ...
        TypeError: The vector passed must contains either Int or Float values.

        >>> zzz = [1, 2, [1, 2]]
        >>> correlation(x, zzz)
        Traceback (most recent call last):
            ...
        IndexError: The vector passed is not a valid vector
    """
    if not len(v_1) == len(v_2):
        raise IndexError("The two vectors must be the same length.")

    if not (valid.is_vector(v_1) and valid.is_vector(v_2)):
        raise IndexError("The vector passed is not a valid vector")

    chk = v_1.copy()
    chk.extend(v_2)
    if any(isinstance(i, str) for i in chk):
        message = ("The vector passed must contains either " +
                   "Int or Float values.")
        raise TypeError(message)

    sd_v_1 = standard_deviation(v_1)
    sd_v_2 = standard_deviation(v_2)

    if sd_v_1 > 0 and sd_v_2 > 0:
        return covariance(v_1, v_2) / sd_v_1 / sd_v_2
    else:
        return 0
Exemplo n.º 6
0
def covariance(v_1, v_2):
    """
    Calculates the covariance of two vectors.

    Args:
        v_1, v_2 (List): List vectors of the same length.

    Returns:
        A scalar value of the covariance.

    Examples:
        >>> x = [1, 2, 3]
        >>> y = [3, 2, 1]
        >>> covariance(x, y)
        -1.0

        >>> z = [1, 2]
        >>> covariance(x, z)
        Traceback (most recent call last):
            ...
        IndexError: The two vectors must be the same length.

        >>> zz = ['a', 'b', 'c']
        >>> covariance(x, zz) # doctest: +NORMALIZE_WHITESPACE
        Traceback (most recent call last):
            ...
        TypeError: The vector passed must contains either Int or Float values.

        >>> zzz = [1, 2, [1, 2]]
        >>> covariance(x, zzz)
        Traceback (most recent call last):
            ...
        IndexError: The vector passed is not a valid vector
    """
    if not len(v_1) == len(v_2):
        raise IndexError("The two vectors must be the same length.")

    if not (valid.is_vector(v_1) and valid.is_vector(v_2)):
        raise IndexError("The vector passed is not a valid vector")

    chk = v_1.copy()
    chk.extend(v_2)
    if any(isinstance(i, str) for i in chk):
        message = ("The vector passed must contains either " +
                   "Int or Float values.")
        raise TypeError(message)

    num_elements = len(v_1)

    return dot(de_mean(v_1), de_mean(v_2)) / (num_elements - 1)
Exemplo n.º 7
0
def variance(vector):
    """
    Calculate the variance of a vector with length >= 2.

    Args:
        vector (List): A vector containing 2 or more Int or Float values.

    Returns:
        A scalar representing the variance.

    Examples:
        >>> x = [1, 2, 3]
        >>> variance(x)
        1.0

        >>> y = ['a', 'b', 'c']
        >>> variance(y) # doctest: +NORMALIZE_WHITESPACE
        Traceback (most recent call last):
            ...
        TypeError: The vector passed must contains either Int or Float values.

        >>> z = [1, 2, [1, 2]]
        >>> variance(z)
        Traceback (most recent call last):
            ...
        IndexError: The vector passed is not a valid vector

        >>> zz = [1]
        >>> variance(zz)
        Traceback (most recent call last):
            ...
        IndexError: The vector must contain at least two values
    """
    if not (valid.is_vector(vector) and valid.is_vector(vector)):
        raise IndexError("The vector passed is not a valid vector")

    if any(isinstance(i, str) for i in vector):
        message = ("The vector passed must contains either " +
                   "Int or Float values.")
        raise TypeError(message)

    if len(vector) < 2:
        raise IndexError("The vector must contain at least two values")

    num_elements = len(vector)

    deviations = de_mean(vector)

    return sum_of_squares(deviations) / (num_elements - 1)
Exemplo n.º 8
0
def scalar_multiply(v, sc):
    """
    Multiply a vector by a scalar.

    Args:
        v (List): List representing the vector.
        sc (int or float): Scalar to multiply by.

    Returns:
        A new vector of the same length as all of the vectors, where the
        corresponding elements have been summed.

    Examples:
        >>> scalar_multiply([1, 2, 3], 1)
        [1, 2, 3]

        >>> scalar_multiply([1, 2, 3], 2)
        [2, 4, 6]

        >>> scalar_multiply([1, 2, [1, 2]], 1)
        Traceback (most recent call last):
            ...
        IndexError: The vector passed is not a valid vector
    """
    if not valid.is_vector(v):
        raise IndexError("The vector passed is not a valid vector")

    return [i * sc for i in v]
Exemplo n.º 9
0
def sum_of_squares(v):
    """
    Compute the sum of squares of for a vector

    Args:
        v (List): The vector for which the sum of squares should be calculated.

    Returns:
        A scalar of the sum of squares of the vector.

    Examples:
        >>> x = [1, 2, 3]
        >>> sum_of_squares(x)
        14

        >>> e = [1, 2, [1, 2]]
        >>> sum_of_squares(e)
        Traceback (most recent call last):
            ...
        IndexError: The vector passed is not a valid vector
    """
    if not valid.is_vector(v):
        raise IndexError("The vector passed is not a valid vector")

    return dot(v, v)
Exemplo n.º 10
0
def magnitude(v):
    """
    Compute the magnitude of a vector

    Args:
        v (List): The vector for which the magnitude should be calculated.

    Returns:
        A scalar of the magnitude of the vector.

    Examples:
        >>> x = [1, 2, 3]
        >>> magnitude(x)
        3.7416573867739413

        >>> e = [1, 2, [1, 2]]
        >>> magnitude(e)
        Traceback (most recent call last):
            ...
        IndexError: The vector passed is not a valid vector
    """
    if not valid.is_vector(v):
        raise IndexError("The vector passed is not a valid vector")

    return math.sqrt(sum_of_squares(v))
Exemplo n.º 11
0
def quantile(vector, percentile):
    """
    Returns the desired percentile value of a vector.

    Args:
        vector (List): A list vector containing the values to calculate
        against.

        percentile (Float): A value of the sought percentile such that
        (0.0 < value < 1.0)

    Returns:
        A scalar value from the target vector, which represents the value less
        than which a certain percentile of the data lies.

    Examples:
        >>> x = [*range(10)]
        >>> quantile(x, 0.30)
        3

        >>> quantile(x, 0.90)
        9

        >>> quantile(x, 1.0) # doctest: +NORMALIZE_WHITESPACE
        Traceback (most recent call last):
            ...
        ValueError: The percentile must be a Float such that
        (0.0 < percentile < 1.0)

        >>> z = [1, 2, 3, [1, 2, 3]]
        >>> quantile(z, 0.50)
        Traceback (most recent call last):
            ...
        IndexError: The vector passed is not a valid vector
    """
    if not (percentile > 0.0 and percentile < 1.0):
        message = ('The percentile must be a Float such that ' +
                   '(0.0 < percentile < 1.0)')
        raise ValueError(message)

    if not (valid.is_vector(vector) and valid.is_vector(vector)):
        raise IndexError("The vector passed is not a valid vector")

    p_index = int(percentile * len(vector))

    return sorted(vector)[p_index]
Exemplo n.º 12
0
def standard_deviation(vector):
    """
    Computes the standard deviation of a vector.

    Args:
        vector (List): A vector.

    Returns:
        A scalar value representing the standard deviation.

    Examples:
        >>> x = [1, 2, 3]
        >>> standard_deviation(x)
        1.0

        >>> y = ['a', 'b', 'c']
        >>> standard_deviation(y) # doctest: +NORMALIZE_WHITESPACE
        Traceback (most recent call last):
            ...
        TypeError: The vector passed must contains either Int or Float values.

        >>> z = [1, 2, [1, 2]]
        >>> standard_deviation(z)
        Traceback (most recent call last):
            ...
        IndexError: The vector passed is not a valid vector

        >>> zz = [1]
        >>> standard_deviation(zz)
        Traceback (most recent call last):
            ...
        IndexError: The vector must contain at least two values
    """
    if not (valid.is_vector(vector) and valid.is_vector(vector)):
        raise IndexError("The vector passed is not a valid vector")

    if any(isinstance(i, str) for i in vector):
        message = ("The vector passed must contains either " +
                   "Int or Float values.")
        raise TypeError(message)

    if len(vector) < 2:
        raise IndexError("The vector must contain at least two values")

    return math.sqrt(variance(vector))
Exemplo n.º 13
0
def median(vector):
    """
    Return the median (central value) of a vector.

    Args:
        vector (List): A list vector containing the values to calculate
        against.

    Returns:
        A scalar Float value representing the median of the vector.

    Examples:
        >>> w = [2, 2, 2]
        >>> median(w)
        2.0

        >>> x = [1, 2, 5]
        >>> median(x)
        2.0

        >>> y = [1, 2, 3, 4]
        >>> median(y)
        2.5

        >>> z = [1, 2, 3, [1, 2]]
        >>> median(z)
        Traceback (most recent call last):
            ...
        IndexError: The vector passed is not a valid vector
    """
    if not (valid.is_vector(vector) and valid.is_vector(vector)):
        raise IndexError("The vector passed is not a valid vector")

    elements = len(vector)
    sorted_vector = sorted(vector)
    midpoint = elements // 2

    if elements % 2 == 1:
        return float(sorted_vector[midpoint])
    else:
        low = midpoint - 1
        hih = midpoint
        return mean([sorted_vector[low], sorted_vector[hih]])
Exemplo n.º 14
0
def vector_subtract(v1, v2):
    """
    Subtracts Corresponding elements in two vectors (lists) of the same length.

    Args:
        v1, v2 (List): Lists of the same length representing the vectors.

    Returns:
        A new vector of the same length as v1 and v2, where the corresponding
        elements have been subtracted to each other.

    Examples:
        >>> w = [3, 2, 1]
        >>> x = [1, 2, 3]
        >>> vector_subtract(w, x)
        [2, 0, -2]

        >>> y = [1, 2, 3]
        >>> vector_subtract(x, y)
        [0, 0, 0]

        >>> z = [1, 3]
        >>> vector_subtract(x, z)
        Traceback (most recent call last):
            ...
        IndexError: Vectors must be the same length

        >>> e = [1,2,[1,2]]
        >>> vector_subtract(x, e)
        Traceback (most recent call last):
            ...
        IndexError: One of the vectors passed is not a valid vector
    """
    if not (valid.is_vector(v1) and valid.is_vector(v2)):
        raise IndexError("One of the vectors passed is not a valid vector")

    if len(v1) != len(v2):
        raise IndexError('Vectors must be the same length')

    return [v1_i - v2_i
            for v1_i, v2_i in zip(v1, v2)]
Exemplo n.º 15
0
def data_range(vector):
    """
    Returns max(vector) - min(vector). Only works for vectors containing
    Int and Float values. Errors on character vectors.

    Args:
        vector (List): A vector to calculate against.

    Returns:
        A scalar value of the same type as the vector.

    Examples:
        >>> w = [1, 2, 3]
        >>> data_range(w)
        2

        >>> x = [1.0, 2.0, 3.0]
        >>> data_range(x)
        2.0

        >>> y = ['a', 'b', 'c']
        >>> data_range(y) # doctest: +NORMALIZE_WHITESPACE
        Traceback (most recent call last):
            ...
        TypeError: The vector passed must contains either Int or Float values.

        >>> z = [1, 2, [1, 2]]
        >>> data_range(z)
        Traceback (most recent call last):
            ...
        IndexError: The vector passed is not a valid vector
    """
    if not (valid.is_vector(vector) and valid.is_vector(vector)):
        raise IndexError("The vector passed is not a valid vector")

    if any(isinstance(i, str) for i in vector):
        message = ("The vector passed must contains either " +
                   "Int or Float values.")
        raise TypeError(message)

    return max(vector) - min(vector)
Exemplo n.º 16
0
def de_mean(vector):
    """
    Translate the vector by subtracting its mean, so the result has mean=0.

    Args:
        vector (List): a valid vector

    Returns:
        A vector (List), centered on 0.

    Examples:
        >>> x = [1, 2, 3]
        >>> de_mean(x)
        [-1.0, 0.0, 1.0]

        >>> y = ['a', 'b', 'c']
        >>> de_mean(y) # doctest: +NORMALIZE_WHITESPACE
        Traceback (most recent call last):
            ...
        TypeError: The vector passed must contains either Int or Float values.

        >>> z = [1, 2, [1, 2]]
        >>> de_mean(z)
        Traceback (most recent call last):
            ...
        IndexError: The vector passed is not a valid vector
    """
    if not (valid.is_vector(vector) and valid.is_vector(vector)):
        raise IndexError("The vector passed is not a valid vector")

    if any(isinstance(i, str) for i in vector):
        message = ("The vector passed must contains either " +
                   "Int or Float values.")
        raise TypeError(message)

    x_bar = mean(vector)

    return [x_i - x_bar for x_i in vector]
Exemplo n.º 17
0
def dot(v1, v2):
    """
    Compute the dot product of two vectors.

    Args:
        v1, v2 (List): The vectors for which a dot product will be computed.
            Must be the same length.

    Returns:
        A scalar of the dot product of the two vectors.

    Examples:
        >>> w = [1, 2, 3]
        >>> x = [1, 2, 3]
        >>> dot(w, x)
        14

        >>> z = [1, 2]
        >>> dot(x, z)
        Traceback (most recent call last):
            ...
        IndexError: Vectors must be the same length

        >>> e = [1, 2, [1, 2]]
        >>> dot(x, e)
        Traceback (most recent call last):
            ...
        IndexError: One of the vectors passed is not a valid vector
    """
    if not (valid.is_vector(v1) and valid.is_vector(v2)):
        raise IndexError("One of the vectors passed is not a valid vector")

    if not len(v1) == len(v2):
        raise IndexError('Vectors must be the same length')

    return sum(v1_i * v2_i
               for v1_i, v2_i in zip(v1, v2))
Exemplo n.º 18
0
def interquartile_range(vector):
    """
    Calculates the difference between the 75th and 25th percentile.

    Args:
        vector (List): A numeric vector.

    Returns:
        A scalar value representing the interquartile range.

    Examples:
        >>> x = [*range(10)]
        >>> interquartile_range(x)
        5

        >>> y = ['a', 'b', 'c']
        >>> interquartile_range(y) # doctest: +NORMALIZE_WHITESPACE
        Traceback (most recent call last):
            ...
        TypeError: The vector passed must contains either Int or Float values.

        >>> z = [1, 2, [1, 2]]
        >>> interquartile_range(z)
        Traceback (most recent call last):
            ...
        IndexError: The vector passed is not a valid vector
    """
    if not (valid.is_vector(vector) and valid.is_vector(vector)):
        raise IndexError("The vector passed is not a valid vector")

    if any(isinstance(i, str) for i in vector):
        message = ("The vector passed must contains either " +
                   "Int or Float values.")
        raise TypeError(message)

    return quantile(vector, 0.75) - quantile(vector, 0.25)
Exemplo n.º 19
0
def vector_sum(vectors):
    """
    Sum Corresponding elements in all vectors (lists) of the same length.

    Args:
        vectors (List): Lists of the same length representing the vectors.

    Returns:
        A new vector of the same length as all of the vectors, where the
        corresponding elements have been summed.

    Examples:
        >>> w = [1, 2, 3]
        >>> x = [1, 2, 3]
        >>> y = [1, 2, 3]
        >>> myVecs = [w, x, y]
        >>> vector_sum(myVecs)
        [3, 6, 9]

        >>> z = [1, 3]
        >>> myVecs = [w, x, y, z]
        >>> vector_sum(myVecs)
        Traceback (most recent call last):
            ...
        IndexError: Vectors must be the same length

        >>> e = [1, 2, [1, 2]]
        >>> vector_sum([w, x, y, e])
        Traceback (most recent call last):
            ...
        IndexError: One of the vectors passed is not a valid vector
    """
    if not all(valid.is_vector(v) for v in vectors):
        raise IndexError("One of the vectors passed is not a valid vector")

    lengthToTest = len(vectors[0])
    if any(len(v) != lengthToTest for v in vectors[1:]):
        raise IndexError('Vectors must be the same length')

    return reduce(vector_add, vectors)
Exemplo n.º 20
0
def vector_mean(vectors):
    """
    Compute the vector whose ith element is the mean of the ith elements of
    the input vectors

    Args:
        vectors (List): List containing all of the vectors to be computed
        against.

    Returns:
        A new vector of the same length as all of the vectors, where the
        corresponding elements have had a mean calculated on them.

    Examples:
        >>> myVecs = [ [1, 2, 3], [1, 2, 3], [1, 2, 3] ]
        >>> vector_mean(myVecs)
        [1.0, 2.0, 3.0]

        >>> myVecs = [ [1, 2, 3], [2, 2, 2], [3, 2, 1] ]
        >>> vector_mean(myVecs)
        [2.0, 2.0, 2.0]

        >>> myVecs = [ [1, 2, 3], [2, 2, 2], [2, 1] ]
        >>> vector_mean(myVecs)
        Traceback (most recent call last):
            ...
        IndexError: Vectors must be the same length

        >>> myVecs = [ [1, 2, 3], [2, 2, 2], [2, 1, [1, 2] ] ]
        >>> vector_mean(myVecs)
        Traceback (most recent call last):
            ...
        IndexError: One of the vectors passed is not a valid vector
    """
    if not all(valid.is_vector(v) for v in vectors):
        raise IndexError("One of the vectors passed is not a valid vector")

    n = len(vectors)
    return scalar_multiply(vector_sum(vectors), 1/n)