def __init__(self, query_manager, logCB=None, progressCB=None): #For reporting results self.printOut = PrintOutput(logCB, progressCB, PROFILING) self.query_manager = query_manager #Profile of information currently being dealt with self.class_result_dict = None self.class_att_value_weight = None self.numeric_result_dict = None self.get_possible_values(query_manager) #Used by SVM_model to piece together results self.label_id_lookup_table = None #Current data being stored self.labels = [] self.samples = [] self.is_null_list = [] #Used by KNN self.log_trans_atts = set([]) self.attribute_id_list = [] self.attribute_id_dict = {} self.id_attribute_dict = {}
def __init__(self, xml_elem, logCB=None, progressCB=None): #For reporting results self.printOut = PrintOutput(logCB, progressCB, PROFILING) #KNN tuning parameters self.k = 10 #Make this 1 more than the number of columns self.num_display = 10 self.num_mod = 1 #Attributes that are used to make the prediction attributes_string = xml_elem.attributes['attributes'].value self.attributes = util_get_attribute_list(attributes_string) #NOT ACTUALLY USED, JUST MAKES IT SO KNN LIBRARY CAN BE USED self.test_attribute = None #Sets of attributes that must be considered as a whole self.attribute_combinations = [] #Set all weights to 1 self.initialized_weights = {} for attribute in self.attributes: self.initialized_weights[attribute] = 1 #Attributes that will get there values log transformed to produce better results if xml_elem.hasAttribute('log_trans_attributes'): log_trans_string = xml_elem.attributes[ 'log_trans_attributes'].value temp_atts_list = util_get_attribute_list(log_trans_string) self.log_trans_atts = set(temp_atts_list) self.null_value_list = [] #NOT USED #Random information self.test_type = "LDOF"
def __init__(self, xml_elem, MAKE_ALL_PREDS, logCB = None, progressCB = None) : #For reporting results self.printOut = PrintOutput(logCB, progressCB, PROFILING) #Test specific information self.test_attribute = xml_elem.attributes["test_attribute"].value self.test_classifier = "weka.classifiers.lazy.IBk" if xml_elem.hasAttribute("test_classifier") : self.test_classifier = xml_elem.attributes["classifier"].value self.test_options = "-I -K 20 -X -A weka.core.neighboursearch.KDTree" if xml_elem.hasAttribute("options") : self.test_options = xml_elem.attributes["options"].value #Feature selection information self.use_feature_selection = False self.using_pca = False self.search_class = "" self.evaluation_class = "" if xml_elem.hasAttribute('fs_evaluation_class'): self.use_feature_selection = True self.search_class = xml_elem.attributes["fs_search_class"].value self.evaluation_class = xml_elem.attributes["fs_evaluation_class"].value #Checking for pca if self.evaluation_class.find("PrincipalComponents") > -1 : self.using_pca = True #Attributes that the search class starts with (Not used with PCA) self.start_attributes = [] if xml_elem.hasAttribute('fs_start_attributes') : self.start_attributes = util_get_attribute_list(xml_elem.attributes['fs_start_attributes'].value) #Attributes that are used to make the prediction attributes_string = xml_elem.attributes["train_attributes"].value self.attributes = util_get_attribute_list(attributes_string) #Values that are considered null for the target attribute self.null_value_list = [] elements = xml_elem.getElementsByTagName('null_values') if len(elements) > 0 : null_val_element = elements[0] for element in null_val_element.getElementsByTagName('v') : attribute = element.attributes['attribute'].value type = element.attributes['type'].value value = element.attributes['value'].value vt = element.attributes['vt'].value null_dict = {"attribute" : attribute, "type" : type} if vt == "int" : null_dict["value"] = int(value) elif vt == "string" : null_dict["value"] = str(value) self.null_value_list.append(null_dict) #Simply defined null values if xml_elem.hasAttribute("null_value") : null_value = xml_elem.attributes["null_value"].value null_dict = {"attribute" : self.test_attribute, "type" : "E", "value" : int(null_value)} self.null_value_list.append(null_dict) #Random information self.test_type = "Num" self.MAKE_ALL_PREDS = MAKE_ALL_PREDS
def __init__(self, io_info_element, logCB=None, progressCB=None): #For reporting results self.printOut = PrintOutput(logCB, progressCB, PROFILING) #Storing all the information passed as parameters to the query manager self.db_url = io_info_element.attributes["input_db_url"].value self.table_name = io_info_element.attributes["input_table_name"].value self.x_attribute = io_info_element.attributes["x_column"].value self.y_attribute = io_info_element.attributes["y_column"].value self.id_attribute = io_info_element.attributes["id_column"].value #Forcing certain attributes to be categorical self.fclass_atts = [] if io_info_element.hasAttribute('force_to_class'): self.fclass_atts = util_get_attribute_list( io_info_element.attributes["force_to_class"].value) #Forcing certain attributes to be numerical self.fnum_atts = [] elements = io_info_element.getElementsByTagName('force_to_numeric') if io_info_element.hasAttribute('force_to_numeric'): self.fnum_atts = util_get_attribute_list( io_info_element.attributes["force_to_numeric"].value) #Size of blocks that will be created self.train_size = 40000 if io_info_element.hasAttribute("train_block_size"): self.train_size = int( io_info_element.attributes["train_block_size"].value) self.test_size = 40000 if io_info_element.hasAttribute("test_block_size"): self.test_size = int( io_info_element.attributes["test_block_size"].value) #Getting access to the table self.table = util_get_table(self.db_url, self.table_name) #Getting all attributes from the table #Getting what types of attributes they are (self.class_list, self.numeric_list, self.attributes) = util_get_attribute_info(self.table, self.fclass_atts, self.fnum_atts) #Used for the parcel query self.query_string = True elements = io_info_element.getElementsByTagName('test_criteria') if len(elements) > 0: tc_elem = elements[0] self.query_string = self.util_create_query_string(tc_elem) #Used for extreme rows that are included in every test done self.ois_query_string = None elements = io_info_element.getElementsByTagName('outlier_inc_set') if len(elements) > 0: ois_elem = elements[0] if len(ois_elem.getElementsByTagName('or')) > 0: self.ois_query_string = self.util_create_query_string(ois_elem) #Getting x/y boundaries of the parcels and number of rows #(may want to find a faster way to do this) (self.x_max, self.y_max, self.x_min, self.y_min, self.total_count) = self.util_spatial_boundaries() self.rows_left = self.total_count #Information that is being stored about the number of parcel blocks remaining and used self.printOut.pLog("RET- Creating all parcel blocks...") self.block_list = self.util_create_parcel_block( self.x_max, self.y_max, self.x_min, self.y_min) self.set_colors() self.used_blocks = [] #In order to make sure max, min vals didn't leave any out #Can happen if x and y attributes are varchars in metadata self.adjust_borders() #Used for profiling the speed at which the program is running self.first_query_time = None self.number_rows_tested = 0 self.table_current_test_rows = [] #Parcel block information self.current_test_block = None self.current_training_block = None self.group_max = 2 self.group_count = 2 if io_info_element.hasAttribute('num_cv_folds'): self.group_max = int( io_info_element.attributes['num_cv_folds'].value) self.group_count = self.group_max self.overall_is_test_list = [] self.use_as_training = [] #Current rows retrieved self.current_rows = [] self.is_test_list = [] self.is_null_list = [] self.test_number = []