示例#1
0
    def __init__(self, query_manager, logCB=None, progressCB=None):

        #For reporting results
        self.printOut = PrintOutput(logCB, progressCB, PROFILING)

        self.query_manager = query_manager

        #Profile of information currently being dealt with
        self.class_result_dict = None
        self.class_att_value_weight = None
        self.numeric_result_dict = None
        self.get_possible_values(query_manager)

        #Used by SVM_model to piece together results
        self.label_id_lookup_table = None

        #Current data being stored
        self.labels = []
        self.samples = []
        self.is_null_list = []

        #Used by KNN
        self.log_trans_atts = set([])
        self.attribute_id_list = []
        self.attribute_id_dict = {}
        self.id_attribute_dict = {}
示例#2
0
    def __init__(self, xml_elem, logCB=None, progressCB=None):

        #For reporting results
        self.printOut = PrintOutput(logCB, progressCB, PROFILING)

        #KNN tuning parameters
        self.k = 10  #Make this 1 more than the number of columns
        self.num_display = 10
        self.num_mod = 1

        #Attributes that are used to make the prediction
        attributes_string = xml_elem.attributes['attributes'].value
        self.attributes = util_get_attribute_list(attributes_string)

        #NOT ACTUALLY USED, JUST MAKES IT SO KNN LIBRARY CAN BE USED
        self.test_attribute = None

        #Sets of attributes that must be considered as a whole
        self.attribute_combinations = []

        #Set all weights to 1
        self.initialized_weights = {}
        for attribute in self.attributes:
            self.initialized_weights[attribute] = 1

        #Attributes that will get there values log transformed to produce better results
        if xml_elem.hasAttribute('log_trans_attributes'):
            log_trans_string = xml_elem.attributes[
                'log_trans_attributes'].value
            temp_atts_list = util_get_attribute_list(log_trans_string)
            self.log_trans_atts = set(temp_atts_list)

        self.null_value_list = []  #NOT USED

        #Random information
        self.test_type = "LDOF"
示例#3
0
    def __init__(self, xml_elem, MAKE_ALL_PREDS, logCB = None, progressCB = None) :

        #For reporting results
        self.printOut = PrintOutput(logCB, progressCB, PROFILING) 

        #Test specific information
        self.test_attribute = xml_elem.attributes["test_attribute"].value
        
        self.test_classifier = "weka.classifiers.lazy.IBk"
        if xml_elem.hasAttribute("test_classifier") :
            self.test_classifier = xml_elem.attributes["classifier"].value

        self.test_options = "-I -K 20 -X -A weka.core.neighboursearch.KDTree"
        if xml_elem.hasAttribute("options") :
            self.test_options = xml_elem.attributes["options"].value

        #Feature selection information
        self.use_feature_selection = False
        self.using_pca = False
        self.search_class = ""
        self.evaluation_class = ""
        if xml_elem.hasAttribute('fs_evaluation_class'):
            self.use_feature_selection = True

            self.search_class = xml_elem.attributes["fs_search_class"].value
            self.evaluation_class = xml_elem.attributes["fs_evaluation_class"].value
            
            #Checking for pca
            if self.evaluation_class.find("PrincipalComponents") > -1 :
                self.using_pca = True
                
            #Attributes that the search class starts with (Not used with PCA)
            self.start_attributes = []
            if xml_elem.hasAttribute('fs_start_attributes') :
                self.start_attributes = util_get_attribute_list(xml_elem.attributes['fs_start_attributes'].value)  
            
        #Attributes that are used to make the prediction        
        attributes_string = xml_elem.attributes["train_attributes"].value
        self.attributes = util_get_attribute_list(attributes_string)

        #Values that are considered null for the target attribute
        self.null_value_list = []
        elements = xml_elem.getElementsByTagName('null_values')
        if len(elements) > 0 :
            null_val_element = elements[0]
            for element in null_val_element.getElementsByTagName('v') :
    
                attribute = element.attributes['attribute'].value
                type = element.attributes['type'].value
                value = element.attributes['value'].value
                vt = element.attributes['vt'].value
    
                null_dict = {"attribute" : attribute, "type" : type}
    
                if vt == "int" :
                    null_dict["value"] = int(value)
                elif vt == "string" :
                    null_dict["value"] = str(value)
            
                self.null_value_list.append(null_dict)

        #Simply defined null values
        if xml_elem.hasAttribute("null_value") :
            null_value = xml_elem.attributes["null_value"].value
            null_dict = {"attribute" : self.test_attribute, "type" : "E", "value" : int(null_value)}
            self.null_value_list.append(null_dict)

        #Random information
        self.test_type = "Num"
        self.MAKE_ALL_PREDS = MAKE_ALL_PREDS           
示例#4
0
    def __init__(self, io_info_element, logCB=None, progressCB=None):

        #For reporting results
        self.printOut = PrintOutput(logCB, progressCB, PROFILING)

        #Storing all the information passed as parameters to the query manager
        self.db_url = io_info_element.attributes["input_db_url"].value
        self.table_name = io_info_element.attributes["input_table_name"].value
        self.x_attribute = io_info_element.attributes["x_column"].value
        self.y_attribute = io_info_element.attributes["y_column"].value
        self.id_attribute = io_info_element.attributes["id_column"].value

        #Forcing certain attributes to be categorical
        self.fclass_atts = []
        if io_info_element.hasAttribute('force_to_class'):
            self.fclass_atts = util_get_attribute_list(
                io_info_element.attributes["force_to_class"].value)

        #Forcing certain attributes to be numerical
        self.fnum_atts = []
        elements = io_info_element.getElementsByTagName('force_to_numeric')
        if io_info_element.hasAttribute('force_to_numeric'):
            self.fnum_atts = util_get_attribute_list(
                io_info_element.attributes["force_to_numeric"].value)

        #Size of blocks that will be created
        self.train_size = 40000
        if io_info_element.hasAttribute("train_block_size"):
            self.train_size = int(
                io_info_element.attributes["train_block_size"].value)

        self.test_size = 40000
        if io_info_element.hasAttribute("test_block_size"):
            self.test_size = int(
                io_info_element.attributes["test_block_size"].value)

        #Getting access to the table
        self.table = util_get_table(self.db_url, self.table_name)

        #Getting all attributes from the table
        #Getting what types of attributes they are
        (self.class_list, self.numeric_list,
         self.attributes) = util_get_attribute_info(self.table,
                                                    self.fclass_atts,
                                                    self.fnum_atts)

        #Used for the parcel query
        self.query_string = True
        elements = io_info_element.getElementsByTagName('test_criteria')
        if len(elements) > 0:
            tc_elem = elements[0]
            self.query_string = self.util_create_query_string(tc_elem)

        #Used for extreme rows that are included in every test done
        self.ois_query_string = None
        elements = io_info_element.getElementsByTagName('outlier_inc_set')
        if len(elements) > 0:
            ois_elem = elements[0]
            if len(ois_elem.getElementsByTagName('or')) > 0:
                self.ois_query_string = self.util_create_query_string(ois_elem)

        #Getting x/y boundaries of the parcels and number of rows
        #(may want to find a faster way to do this)
        (self.x_max, self.y_max, self.x_min, self.y_min,
         self.total_count) = self.util_spatial_boundaries()

        self.rows_left = self.total_count

        #Information that is being stored about the number of parcel blocks remaining and used
        self.printOut.pLog("RET- Creating all parcel blocks...")

        self.block_list = self.util_create_parcel_block(
            self.x_max, self.y_max, self.x_min, self.y_min)
        self.set_colors()
        self.used_blocks = []

        #In order to make sure max, min vals didn't leave any out
        #Can happen if x and y attributes are varchars in metadata
        self.adjust_borders()

        #Used for profiling the speed at which the program is running
        self.first_query_time = None
        self.number_rows_tested = 0

        self.table_current_test_rows = []

        #Parcel block information
        self.current_test_block = None
        self.current_training_block = None
        self.group_max = 2
        self.group_count = 2
        if io_info_element.hasAttribute('num_cv_folds'):
            self.group_max = int(
                io_info_element.attributes['num_cv_folds'].value)
            self.group_count = self.group_max

        self.overall_is_test_list = []
        self.use_as_training = []

        #Current rows retrieved
        self.current_rows = []
        self.is_test_list = []
        self.is_null_list = []
        self.test_number = []