def merge_loop(double_set, list_name, file=None): """ 进行团合并操作,循环直到不能合并 :param double_set: :return:团成员最大数,最终的团 """ bestSet = set() oldSet = double_set num_list = [] count_list = [] group_list = [] while len(oldSet) > 0: print('成员数:', len(list(oldSet)[0])) print('个数:', len(oldSet)) print(oldSet) num_list.append(len(list(oldSet)[0])) count_list.append(len(oldSet)) group_list.append(oldSet) bestSet = oldSet oldSet = merge_group(oldSet, double_set) if file is not None: group_list = utils.num_2_word(list_name, group_list) utils.write_csv(['成员数', '个数', '团'], file, num_list, count_list, group_list) utils.save_pickle(file + '.pkl', group_list) return len(list(bestSet)[0]), bestSet
def calculate(series, list_name, group_list_word): """ 统计显示每个药方对应的团 :param series: :param list_name: :param group_list_word: :return: """ group_list = utils.word_2_num(list_name, group_list_word) series_list,index_list = series_2_list(series, list_name) pattern_list = [] for item in series_list: pattern = [] for group in group_list: if (utils.is_in(group, item)): pattern.append(group) pattern_list.append(pattern) series_list = utils.num_2_word(list_name, series_list) pattern_list = utils.num_2_word(list_name, pattern_list) utils.write_csv(['主治', '功能团'], 'data/pattern.csv', series_list, pattern_list)
def cluster_main2(relatives_list, list_name): list_qyt = duplicate_removal(relatives_list, list_name) # 使用数字代替列表中的项 list_num = utils.word_2_num(list_name, list_qyt) for group_num in range(3, 9): new_list = utils.cut_by_num(list_num, group_num) list_num2 = del_by_correlation(new_list) reWord = utils.num_2_word(list_name, list_num2) # 创建二元组 doubleSet = create_double_set(list_num2) max_num, bestSet = merge_loop(doubleSet, list_name, 'data/group' + str(group_num) + '.csv') # 信息利用率 print(max_num, '/', group_num, '=', max_num / group_num)
for i, item in enumerate(relatives_list): name = list_name[i] for j in item: if (name != j[0]): relative_list[i].append(j[0]) if (name != j[1]): relative_list[i].append(j[1]) utils.write_csv(['症状', '亲友团'], 'data/relatives.csv', list_name, relative_list) if __name__ == "__main__": dd = calculate_correlation([(0, 1)], [0.1], [0.2, 0.3]) dd2 = calculate_correlation([(1, 0)], [0.1], [0.2, 0.3]) list_name = utils.load_pickle('list_name.txt') list_fre = utils.load_pickle('list_fre.txt') combinations_list = utils.load_pickle('combinations_list.txt') combinations_fre = utils.load_pickle('combinations_fre.txt') correlation = calculate_correlation(combinations_list, combinations_fre, list_fre) # combinations_name = comb_names(list_name,combinations_list) combinations_name = utils.num_2_word(list_name, combinations_list) column_1 = pd.Series(combinations_name, name='组合') column_2 = pd.Series(correlation, name='关联度系数') data = pd.concat([column_1, column_2], axis=1) data = data.sort_values(by='关联度系数', ascending=False) data.to_csv('rel2.csv', index=False, encoding='utf-8') # relatives_list = relatives(list_name, data, 5)#舍弃改方法 relatives_list = relatives_2(list_name, data, 8) utils.save_pickle('relatives_list.txt', relatives_list)