def chi_square_distance(data, data_instance1, data_instance2):
    """This distance metric is used only for categorical data."""
    
    if len(data_instance1.feature_vector) != len(data_instance2.feature_vector):
        print("The two points must be defined in the same dimensional space.")
        return
    
    column_list = pick_up_all_data_columns(data)
    total_attrs = []
    for i, data_list in enumerate(column_list):
        if i != 0:
            total_attrs.append(sum(data_list))
    average_profiles = [value/sum(total_attrs) for value in total_attrs]
    
    relative_data = []
    for row in data:
        temp_sum = sum(row[1:]) #skips the label column
        relative_data.append([feature/temp_sum for i,feature in enumerate(row) if i !=0])
    
    column_list = pick_up_all_data_columns(relative_data)
    
    cluster1_list = []
    cluster2_list = []
    for feature in data_instance1.feature_vector:
        cluster1_list.append(feature/sum(data_instance1.feature_vector))
        
    for feature in data_instance1.feature_vector:
        cluster2_list.append(feature/sum(data_instance2.feature_vector))
    
    sum_of_squares = 0.0
    for i in range(0, len(cluster1_list)):
        sum_of_squares += (1/average_profiles[i]) * pow(cluster1_list[i] - cluster2_list[i], 2)
    return sqrt(sum_of_squares)
def standardised_euclidean_distance(data,data_instance1, data_instance2):
    """Returns the euclidean distance of two clusters in the n-dimensional space, which is defined by the number of their attributes, 
       weighted by the inverse of the corresponding attribute's variance.
    """
    if len(data_instance1.feature_vector) != len(data_instance2.feature_vector):
        print("The two points must be defined in the same dimensional space.")
        return
    
    column_list = pick_up_all_data_columns(data)
    
    std_list = []
    for i,column in enumerate(column_list):
        if i != 0:         #because the first column is always the data labels
            std_list.append(standard_deviation(column))
    
    sum_of_squares = 0.0
    for i in range(0, len(data_instance1.feature_vector)):
        sum_of_squares += 1/pow(std_list[i],2) * pow(data_instance1.feature_vector[i] - data_instance2.feature_vector[i], 2) #the weight here is the variance not the std
    return sqrt(sum_of_squares)