예제 #1
0
def merge_loop(double_set, list_name, file=None):
    """
    进行团合并操作,循环直到不能合并
    :param double_set:
    :return:团成员最大数,最终的团
    """
    bestSet = set()
    oldSet = double_set
    num_list = []
    count_list = []
    group_list = []
    while len(oldSet) > 0:
        print('成员数:', len(list(oldSet)[0]))
        print('个数:', len(oldSet))
        print(oldSet)
        num_list.append(len(list(oldSet)[0]))
        count_list.append(len(oldSet))
        group_list.append(oldSet)
        bestSet = oldSet
        oldSet = merge_group(oldSet, double_set)
    if file is not None:
        group_list = utils.num_2_word(list_name, group_list)
        utils.write_csv(['成员数', '个数', '团'], file, num_list, count_list,
                        group_list)
        utils.save_pickle(file + '.pkl', group_list)
    return len(list(bestSet)[0]), bestSet
def calculate(series, list_name, group_list_word):
    """
    统计显示每个药方对应的团
    :param series:
    :param list_name:
    :param group_list_word:
    :return:
    """
    group_list = utils.word_2_num(list_name, group_list_word)
    series_list,index_list = series_2_list(series, list_name)
    pattern_list = []
    for item in series_list:
        pattern = []
        for group in group_list:
            if (utils.is_in(group, item)):
                pattern.append(group)
        pattern_list.append(pattern)
    series_list = utils.num_2_word(list_name, series_list)
    pattern_list = utils.num_2_word(list_name, pattern_list)
    utils.write_csv(['主治', '功能团'], 'data/pattern.csv', series_list, pattern_list)
예제 #3
0
def cluster_main2(relatives_list, list_name):
    list_qyt = duplicate_removal(relatives_list, list_name)
    # 使用数字代替列表中的项
    list_num = utils.word_2_num(list_name, list_qyt)
    for group_num in range(3, 9):
        new_list = utils.cut_by_num(list_num, group_num)
        list_num2 = del_by_correlation(new_list)
        reWord = utils.num_2_word(list_name, list_num2)
        # 创建二元组
        doubleSet = create_double_set(list_num2)
        max_num, bestSet = merge_loop(doubleSet, list_name,
                                      'data/group' + str(group_num) + '.csv')
        # 信息利用率
        print(max_num, '/', group_num, '=', max_num / group_num)
    for i, item in enumerate(relatives_list):
        name = list_name[i]
        for j in item:
            if (name != j[0]):
                relative_list[i].append(j[0])
            if (name != j[1]):
                relative_list[i].append(j[1])
    utils.write_csv(['症状', '亲友团'], 'data/relatives.csv', list_name,
                    relative_list)


if __name__ == "__main__":
    dd = calculate_correlation([(0, 1)], [0.1], [0.2, 0.3])
    dd2 = calculate_correlation([(1, 0)], [0.1], [0.2, 0.3])
    list_name = utils.load_pickle('list_name.txt')
    list_fre = utils.load_pickle('list_fre.txt')
    combinations_list = utils.load_pickle('combinations_list.txt')
    combinations_fre = utils.load_pickle('combinations_fre.txt')
    correlation = calculate_correlation(combinations_list, combinations_fre,
                                        list_fre)
    # combinations_name = comb_names(list_name,combinations_list)
    combinations_name = utils.num_2_word(list_name, combinations_list)
    column_1 = pd.Series(combinations_name, name='组合')
    column_2 = pd.Series(correlation, name='关联度系数')
    data = pd.concat([column_1, column_2], axis=1)
    data = data.sort_values(by='关联度系数', ascending=False)
    data.to_csv('rel2.csv', index=False, encoding='utf-8')
    # relatives_list = relatives(list_name, data, 5)#舍弃改方法
    relatives_list = relatives_2(list_name, data, 8)
    utils.save_pickle('relatives_list.txt', relatives_list)