def __normalize(nativeElements, foreignElements):
    logger.log_header("Normalization")

    MIN_INDEX = 0
    MAX_INDEX = 1

    # Find MIN / MAX For native elements
    min_max_values = __min_max(nativeElements, MIN_INDEX, MAX_INDEX)

    for nativeClass in nativeElements:

        for element in nativeClass.learning_set:
            for i in range(0, len(element.characteristicsValues)):
                charValue = element.characteristicsValues[i]
                element.characteristicsValues[i] = __norm(
                    charValue, min_max_values[i][MIN_INDEX],
                    min_max_values[i][MAX_INDEX])

        for element in nativeClass.test_set:
            for i in range(0, len(element.characteristicsValues)):
                charValue = element.characteristicsValues[i]
                element.characteristicsValues[i] = __norm(
                    charValue, min_max_values[i][MIN_INDEX],
                    min_max_values[i][MAX_INDEX])

    # Find MIN / MAX For foreign elements
    """
def __min_max(nativeElements, MIN_INDEX=0, MAX_INDEX=1):

    # Get the dimension
    dim = len(nativeElements[0].learning_set[0].characteristicsValues)

    # Set up min max values of each characterstic
    min_max_values = [[0] * 2 for x in range(dim)]
    for min_max_value in min_max_values:
        min_max_value[MIN_INDEX] = 99999
        min_max_value[MAX_INDEX] = -99999

    logger.log_header("Normalizing Native",
                      styles=[logger.LogHeaderStyle.SUB_HEADER])

    for nativeClass in nativeElements:

        # Find max and min of each characterstic
        for element in nativeClass.learning_set:
            # Dimensions must fit
            if len(element.characteristicsValues) != dim:
                logger.log("Incorrect dimensions. Exiting...")
                sys.exit()

            # Find min max
            for i in range(0, dim):
                charValue = element.characteristicsValues[i]
                # Min
                if min_max_values[i][MIN_INDEX] >= charValue:
                    min_max_values[i][MIN_INDEX] = charValue
                # Max
                if min_max_values[i][MAX_INDEX] <= charValue:
                    min_max_values[i][MAX_INDEX] = charValue

        # Find max and min of each characterstic
        for element in nativeClass.test_set:
            # Dimensions must fit
            if len(element.characteristicsValues) != dim:
                logger.log("Incorrect dimensions. Exiting...")
                sys.exit()

            # Find min max
            for i in range(0, dim):
                charValue = element.characteristicsValues[i]
                # Min
                if min_max_values[i][MIN_INDEX] >= charValue:
                    min_max_values[i][MIN_INDEX] = charValue
                # Max
                if min_max_values[i][MAX_INDEX] <= charValue:
                    min_max_values[i][MAX_INDEX] = charValue
    """
    for i in range(0, dim):
        logger.log("Value #" + str(i), filename="test.txt")
        logger.log(min_max_values[i][MIN_INDEX],
                    filename="test.txt",
                    styles=[logger.LogStyle.NONE])
        logger.log(min_max_values[i][MAX_INDEX],
                    filename="test.txt",
                    styles=[logger.LogStyle.NONE])
    """
    return min_max_values
    def __init__(self,
                 centroid,
                 points,
                 name,
                 number,
                 give_info=True,
                 do_ellipsoid=True,
                 do_cuboid=True):
        if give_info:
            logger.log_header("Created Cluster: " + str([name]) +
                              " Number: #" + str(number),
                              styles=[logger.LogHeaderStyle.SUB_HEADER])

        self.center = centroid
        self.points = points
        if do_cuboid:
            logger.log("Creating Cuboid in Cluster")
            self.cuboid = Cuboid(self.points)
        if do_ellipsoid:
            logger.log("Creating Ellipsoid in Cluster")
            self.ellipsoid = Ellipsoid(self.points, global_v.SEMI_AXIS_SCALE)
            if (global_v.CHAR_NUM == 3):
                self.rejected_x, self.rejected_y, self.rejected_z = self.ellipsoid.is_point_in_ellipsoid(
                    self.points[:])
            else:
                self.rejected_x, self.rejected_y = self.ellipsoid.is_point_in_ellipsoid(
                    self.points[:])
            if give_info:
                self.__info(name, number)
                logger.log('Points in ellipsoid: ' +
                           str((1 - len(self.rejected_x) / len(self.points)) *
                               100) + '%')
示例#4
0
def run():
    symbolClasses = []

    if global_v.NATIVE_TRAINING_FILE:
        logger.log_header("Cluster Evaluation: " + str(global_v.NATIVE_TRAINING_FILE))
        symbolClasses = loader.deserialize_native()
        __compute_cluster_evaluation(symbolClasses.learning_set)
    else:
        logger.log_header("Cluster Evaluation, k clouds: " + str(global_v.K_CLOUD_DISTORTION))
        symbolClasses = __generate_symbol()
        __compute_cluster_evaluation(symbolClasses[0].learning_set)
def __rat_l_evaluation(training_set, start_k, end_k):
    logger.log_header("Ratkowsky-Lance",
                      filename=logger.LOG_CLUSTER_FILE_NAME,
                      styles=[logger.LogHeaderStyle.SUB_HEADER])

    Results = rat_l.compute(training_set, start_k, end_k)

    for i in range(0, len(Results)):
        logger.log("rat_l(" + str(i + start_k) + ") = " + str(Results[i]),
                   filename=logger.LOG_CLUSTER_FILE_NAME,
                   styles=[logger.LogStyle.NONE])
def __pbm_evaluation(training_set, start_k, end_k):
    logger.log_header("PBM",
                      filename=logger.LOG_CLUSTER_FILE_NAME,
                      styles=[logger.LogHeaderStyle.SUB_HEADER])

    Results = pbm.compute(training_set, start_k, end_k)

    for i in range(0, len(Results)):
        logger.log("pbm(" + str(i + start_k) + ") = " + str(Results[i]),
                   filename=logger.LOG_CLUSTER_FILE_NAME,
                   styles=[logger.LogStyle.NONE])
def __compute_clusters(nativeElements):
    logger.log_header("Clustering K = " + str(global_v.K))

    # Init the progress bar
    p_bar.init(1, "Clustering")

    # Legacy function, requirs a list as input
    tmp_list = [nativeElements]
    Clusterer().computeClusters(tmp_list)

    # Finish the progress bar
    p_bar.finish()
def __load_symbols():
    # Load Native symbols
    logger.log_header("Loading Native symbols")
    nativeElements = loader.deserialize_native()

    # Load Foreign symbols
    logger.log_header("Loading Foreign symbols")
    foreignElements = loader.load_foreign_xls()

    global_v.CLASS_NUM = 1
    global_v.CHAR_NUM = len(
        nativeElements.learning_set[0].characteristicsValues)

    return nativeElements, foreignElements
def __deserialize():
    logger.log_header("Deserializing")

    nativeElements = loader.deserialize_native()

    for learning_element in nativeElements.learning_set:
        element_str = str(learning_element.characteristicsValues)
        element_str = element_str.strip("[")
        element_str = element_str.rstrip("]")

        logger.log(element_str,
                    filename="training" + "_" + ".txt",
                    styles=[logger.LogStyle.NONE, logger.LogStyle.FILE_ONLY],
                    text_indent="")
示例#10
0
def serialize_chosen_elements(nativeElements):
    logger.log_header("Choosing Native elements")
    chosenNativeElements = SymbolClass("", ColorChooser().get_color())

    m_filename = "["

    # Go through all symbols classes and choose the classes we want
    for i in range(0, len(nativeElements)):
        if (nativeElements[i].name in global_v.NATIVE_CLASSES
                or len(global_v.NATIVE_CLASSES) == 0):
            chosenNativeElements.learning_set += nativeElements[i].learning_set
            chosenNativeElements.test_set += nativeElements[i].test_set

            chosenNativeElements.name += str(nativeElements[i].name) + ", "
            m_filename += str(nativeElements[i].name) + ", "

    chosenNativeElements.name = chosenNativeElements.name.rstrip(", ")
    m_filename = m_filename.rstrip(", ")
    m_filename += "]"

    for learning_element in chosenNativeElements.learning_set:
        element_str = str(learning_element.characteristicsValues)
        element_str = element_str.strip("[]")
        element_str = element_str.rstrip("]")

        logger.log(element_str,
                   filename="training" + "_" + m_filename + ".txt",
                   styles=[logger.LogStyle.NONE, logger.LogStyle.FILE_ONLY],
                   text_indent="")

    for test_element in chosenNativeElements.test_set:
        element_str = str(test_element.characteristicsValues)
        element_str = element_str.strip("[")
        element_str = element_str.rstrip("]")

        logger.log(element_str,
                   filename="test" + "_" + m_filename + ".txt",
                   styles=[logger.LogStyle.NONE, logger.LogStyle.FILE_ONLY],
                   text_indent="")

    # Log
    logger.log(str(chosenNativeElements))

    return chosenNativeElements
def __print_results(accuracy, sensitivity, precision, f_measure, TP, FN, TN,
                    FP, classify_geometry):

    # Choose the file for results
    if classify_geometry == classifier.CLASSIFY_ELLIPSOID:
        filename = logger.LOG_RESULTS_ELLIPSOIDS_FILE_NAME
        header = "Ellipsoids"
    elif classify_geometry == classifier.CLASSIFY_CUBOID:
        filename = logger.LOG_RESULTS_CUBOIDS_FILE_NAME
        header = "Cuboids"

    # The decimel to round to for logging results
    round_decimel = 2

    # Main Header
    logger.log_header("Results: " + header, filename)

    # SubHeader: Classifier Quality
    logger.log_header("Classifier Quality: " + header,
                      filename,
                      styles=[logger.LogHeaderStyle.SUB_HEADER])

    logger.log("TP: " + str(round(TP, round_decimel)),
               filename,
               styles=[logger.LogStyle.NONE])
    logger.log("FN: " + str(round(FN, round_decimel)),
               filename,
               styles=[logger.LogStyle.NONE])
    logger.log("TN: " + str(round(TN, round_decimel)),
               filename,
               styles=[logger.LogStyle.NONE])
    logger.log("FP: " + str(round(FP, round_decimel)),
               filename,
               styles=[logger.LogStyle.NONE])

    # SubHeader: Classifier Measurements
    logger.log_header("Classifier Measurements: " + header,
                      filename,
                      styles=[logger.LogHeaderStyle.SUB_HEADER])

    logger.log("Accuracy: " + str(round(accuracy, round_decimel)),
               filename,
               styles=[logger.LogStyle.NONE])
    logger.log("Sensitivity: " + str(round(sensitivity, round_decimel)),
               filename,
               styles=[logger.LogStyle.NONE])
    logger.log("Precision: " + str(round(precision, round_decimel)),
               filename,
               styles=[logger.LogStyle.NONE])
    logger.log("F-Measure: " + str(round(f_measure, round_decimel)),
               filename,
               styles=[logger.LogStyle.NONE])
def __compute_classifier_quality_ellipsoids(nativeElements, foreignElements):
    logger.log_header("Classification. Ellipsoids")

    # Training vs Test, Ellipsoids
    logger.log_header("Classification Training vs Testing. Ellipsoids",
                      styles=[logger.LogHeaderStyle.SUB_HEADER])
    TP, FN = classifier.compute_training_vs_testing(
        nativeElements, classifier.CLASSIFY_ELLIPSOID)

    # Native vs Foreign, Ellipsoids
    logger.log_header("Classification Native vs Foreign. Ellipsoids",
                      styles=[logger.LogHeaderStyle.SUB_HEADER])
    TN, FP = classifier.compute_native_vs_foreign(
        nativeElements, foreignElements, classifier.CLASSIFY_ELLIPSOID)

    # Classifier Measurements
    (accuracy, sensitivity, precision,
     f_measure) = classifier.compute_measurements(TP, FN, TN, FP)

    # Print results
    __print_results(accuracy, sensitivity, precision, f_measure, TP, FN, TN,
                    FP, classifier.CLASSIFY_ELLIPSOID)
def __serialize():
    logger.log_header("Serializing")

    nativeElements = loader.load_native_xls()

    loader.serialize_chosen_elements(nativeElements)