def init(data, k, QI_num=-1): #data:30162*9 """ reset global variables """ global GL_K, RESULT, QI_LEN, QI_DICT, QI_RANGE, QI_ORDER if QI_num <= 0: QI_LEN = len(data[0]) - 1 #8,代表有幾個attribute else: QI_LEN = QI_num GL_K = k RESULT = [] # static values QI_DICT = [] #每個domain的值的對照表 QI_ORDER = [] #每個attribute的domain QI_RANGE = [] #每個attribute的最大值減最小值 att_values = [] #每個attribute的domain for i in range(QI_LEN): att_values.append( set() ) #att_values = [set(), set(), set(), set(), set(), set(), set(), set()] QI_DICT.append(dict()) #QI_DICT = [{}, {}, {}, {}, {}, {}, {}, {}] for record in data: for i in range(QI_LEN): att_values[i].add(record[i]) for i in range(QI_LEN): value_list = list(att_values[i]) value_list.sort(key=cmp_to_key(cmp_value)) QI_RANGE.append(value(value_list[-1]) - value(value_list[0])) QI_ORDER.append(list(value_list)) for index, qi_value in enumerate(value_list): QI_DICT[i][qi_value] = index
def init(data, k, QI_num=-1): """ reset global variables """ global GL_K, RESULT, QI_LEN, QI_DICT, QI_RANGE, QI_ORDER if QI_num <= 0: QI_LEN = len(data[0]) - 1 else: QI_LEN = QI_num GL_K = k RESULT = [] # static values QI_DICT = [] QI_ORDER = [] QI_RANGE = [] att_values = [] for i in range(QI_LEN): att_values.append(set()) QI_DICT.append(dict()) for record in data: for i in range(QI_LEN): att_values[i].add(record[i]) for i in range(QI_LEN): value_list = list(att_values[i]) value_list.sort(key=cmp_to_key(cmp_value)) QI_RANGE.append(value(value_list[-1]) - value(value_list[0])) QI_ORDER.append(list(value_list)) for index, qi_value in enumerate(value_list): QI_DICT[i][qi_value] = index
def get_normalized_width(partition, index): """ return Normalized width of partition similar to NCP """ d_order = QI_ORDER[index] width = value(d_order[partition.high[index]]) - value(d_order[partition.low[index]]) if width == QI_RANGE[index]: return 1 return width * 1.0 / QI_RANGE[index]
def get_normalized_width(partition, index): """ return Normalized width of partition similar to NCP """ d_order = QI_ORDER[index] width = value(d_order[partition.high[index]]) - value( d_order[partition.low[index]]) if width == QI_RANGE[index]: return 1 return width * 1.0 / QI_RANGE[index]