def process_blank(readcsvname, writecsvname):
    print('process_blank')
    csvdata = data_process.read_csv(readcsvname)
    datas = []
    i = 1
    replace = []
    for item in csvdata:
        num = 0
        lengh = len(item)
        for itemdata in item:
            # print 'itemdata',i,itemdata
            if itemdata == '':
                item.pop(num)
            itemdata = itemdata.split('\xe3\x80\x80')
            if len(itemdata) > 1:
                # print 'split itemdata',i,itemdata
                for zz in itemdata:
                    replace.append(zz)
            else:
                replace.append(itemdata[0])
            num += 1
        datas.append(replace)
        replace = []
        i += 1
    data_process.write_in_csv(writecsvname, datas)
Exemplo n.º 2
0
def pickData(readcsvname1, readcsvname2, function):
    print 'pickData'
    preslist = []
    funclist = []
    presdata = data_process.read_csv(readcsvname1)
    funcdata = data_process.read_csv(readcsvname2)
    presdatalist = []
    for item in presdata:
        presdatalist.append(item)
    num = 0
    for item in funcdata:
        # print '功效',item
        if num == 0:
            item[0] = item[0].replace('', '')
        for itemdata in item:
            if itemdata.decode('utf8').find(function) > -1:
                funclist.append(item)
                preslist.append(presdatalist[num])
        num += 1

    print '功效 %s 的样本集大小为:%d' % (function, len(funclist))

    #换功效时 需要修改最后的保存文件!!!
    writecsvname = 'Apriori_QFCS_Prescription.csv'
    data_process.write_in_csv(writecsvname, preslist)
    writecsvname = 'Apriori_QFCS_Function.csv'
    data_process.write_in_csv(writecsvname, funclist)
def composition_process(readcsvname, writecsvname):
    print('composition_process')
    # readcsvname='composition_6.csv'
    csvdata = data_process.read_csv(readcsvname)
    datas = []
    i = 1
    for item in csvdata:
        # print 'row',i,item[0].split(' ')
        item = item[0].split(' ')
        num = 0
        for itemdata in item:
            if itemdata == '':
                item.pop(num)
            num += 1
        datas.append(item)
        i += 1

    # j=1
    # for item in datas:
    #     print 'j', j
    #     j+=1
    #     print item
    #     for itemdata in item:
    #         print 'zz',itemdata

    data_process.write_in_csv(writecsvname, datas)
Exemplo n.º 4
0
def pickFunction(readcsvname1, readcsvname2, writecsvname1, writecsvname2,
                 function):
    print 'pickFunction'
    funcdata = data_process.read_csv(readcsvname1)
    presdata = data_process.read_csv(readcsvname2)
    funclist = []
    preslist = []
    for item in presdata:
        preslist.append(item)
    for item in funcdata:
        funclist.append(item)

    finalpreslist = []
    finalfunclist = []
    print "要选择的方剂功效为 %s:" % function
    print "正在进行中....."
    num = 0
    for item in funclist:
        for itemdata in item:
            itemdata = itemdata.decode('utf8')
            if itemdata.find(function) > -1 and len(preslist[num]) >= 3:
                finalfunclist.append(item)
                finalpreslist.append(preslist[num])
                break
        num += 1
    print "功效%s 在5W数据集的方剂中找到含有该功效方剂 %d 条。" % (function, len(finalfunclist))
    print "收集负例中.....(设置负例为正例个数的1.3倍)"
    num = 0
    count = 0
    neglength = len(finalfunclist) * 1.3
    for item in funclist:
        if (num % 50 == 0):
            check = True
            for itemdata in item:
                itemdata = itemdata.decode('utf8')
                if itemdata.find(function) > -1:
                    check = False
                    break
                #近义词也要考虑过滤
                if itemdata.find('清热') > -1:
                    check = False
                    break
                if itemdata.find('解毒') > -1:
                    check = False
                    break
            if check and count < neglength:
                if len(preslist[num]) > 3:
                    finalfunclist.append(item)
                    finalpreslist.append(preslist[num])
                    count += 1
            if count > neglength:
                break
        num += 1

    print "功效%s 在5W数据集的方剂中正负样例共有 功效:%d 配伍:%d 条。" % (
        function, len(finalfunclist), len(finalpreslist))
    data_process.write_in_csv(writecsvname1, finalfunclist)
    data_process.write_in_csv(writecsvname2, finalpreslist)
Exemplo n.º 5
0
def pickData(readcsvname1, readcsvname2, readcsvname3, readcsvname4, function):
    print 'pickData'
    preslist = []
    funclist = []
    presdata = data_process.read_csv(readcsvname1)
    funcdata = data_process.read_csv(readcsvname2)

    webPresdata = data_process.read_csv(readcsvname3)
    webFuncdata = data_process.read_csv(readcsvname4)

    presdatalist = []
    for item in presdata:
        presdatalist.append(item)

    webPresdatalist = []
    for item in webPresdata:
        webPresdatalist.append(item)
    num = 0
    for item in funcdata:
        # print '功效',item
        if num == 0:
            item[0] = item[0].replace('', '')
        for itemdata in item:
            if itemdata.decode('utf8').find(function) > -1:
                funclist.append(item)
                preslist.append(presdatalist[num])
        num += 1
    positiveCount = len(funclist)
    print '功效 %s 的个数为:%d' % (function, positiveCount)
    negativeCount = positiveCount * 1.2
    num = 0
    count = 0
    for item in webFuncdata:
        # print '功效', item
        if num == 0:
            item[0] = item[0].replace('', '')
        if count < negativeCount:
            for itemdata in item:
                # print 'itemdata.decode(utf8).find(function)',num,itemdata.decode('utf8').find(function)
                if itemdata.decode('utf8').find(function) == -1:
                    funclist.append(item)
                    # print 'test',webPresdatalist[num]
                    preslist.append(webPresdatalist[num])
                    count += 1
                break
        num += 1

    print '功效 %s 的测试样本集大小为:%d' % (function, len(funclist))

    #换功效时 需要修改最后的保存文件!!!
    writecsvname = 'combinePrescription.csv'
    data_process.write_in_csv(writecsvname, preslist)
    writecsvname = 'combineFunction_QFCS.csv'
    data_process.write_in_csv(writecsvname, funclist)
Exemplo n.º 6
0
def processAll_tiaocan(allList,preName,xuexilv,d_a,zz):
    print('规律总结处理...')
    oneList=[]
    writeList=[]
    for item in allList:
        s=''
        l=[]
        for itemdata in item:
            l.append(itemdata)
            s=s+itemdata+ ','
        print(s[:-1])
        oneList.append(s[:-1])
        writeList.append(l)

    data_process.write_in_csv('../myMedicalModel/tiaocan_lr0.01/'+str(xuexilv)+'-'+str(d_a)+'-'+str(zz)+'-ECMA_'+preName+'_Apriori_16_avg_unit.csv', writeList)
    # data_process.write_in_csv('../myMedicalModel/modelvsECMSR_Apriori/AprioriResults/ECMA_' + preName + '_Apriori_16_avg_0.001_64_0.001-final-'+str(cishu)+'.csv', writeList)
    oneSet=list(set(oneList))
    sortList=[]
    for item in oneSet:
        num=oneList.count(item)
        sortList.append([num,item])
def composition_process(readcsvname, writecsvname):
    print('composition_process')
    # readcsvname='composition_6.csv'
    csvdata = data_process.read_csv(readcsvname)
    datas = []
    pattern = re.compile(ur'(?:、|,|;|。|,)')
    pattern1 = re.compile(ur'(?:\(|()')
    pattern2 = re.compile(ur'(?:\)|))')
    for item in csvdata:
        item[0] = item[0].decode('utf-8')
        item[0] = re.sub(pattern, ' ', item[0])
        item[0] = re.sub(pattern1, ' (', item[0])
        item[0] = re.sub(pattern2, ') ', item[0])

        item = item[0].split(' ')
        num = 0
        for itemdata in item:
            if itemdata == '':
                item.pop(num)
            num += 1
        datas.append(item)
    data_process.write_in_csv(writecsvname, datas)
Exemplo n.º 8
0
def processAll(allList,preName,cishu,top):
    print('规律总结处理...')
    oneList=[]
    writeList=[]
    for item in allList:
        s=''
        l=[]
        for itemdata in item:
            l.append(itemdata)
            s=s+itemdata+ ','
        print(s[:-1])
        oneList.append(s[:-1])
        writeList.append(l)

    # data_process.write_in_csv(str(xuexilv)+'-'+str(d_a)+'-'+str(zz)+'-ECMA_'+preName+'_Apriori_16_avg.csv', writeList)
    # data_process.write_in_csv('../myMedicalModel/modelvsECMSR_Apriori/AprioriResults/ECMA_' + preName + '_Apriori_16_avg_0.0001_64_0.001-'+str(cishu)+'.csv', writeList)
    data_process.write_in_csv(
        '../myMedicalModel/load_result-627-uit80-H-top8/ECMA_' + preName + '_Apriori_0.001_128_0.0002-top'+top+'-' + str(cishu) + '.csv', writeList)
    oneSet=list(set(oneList))
    sortList=[]
    for item in oneSet:
        num=oneList.count(item)
        sortList.append([num,item])
Exemplo n.º 9
0
                zz.append(medicallist[weightlist[i][0] - 1])
                allresults.append(medicallist[weightlist[i][0] - 1])
                zz.append(weightlist[i][1])
                importantMedical.append(zz)

            print '当功效为‘清热解毒’时,占主导作用的药物组合是:\n'
            count = 0
            # print importantMedical
            for item in importantMedical:
                # print item
                print '药物%d:' % (count + 1), item[0], item[1]
                count += 1
            print '一共有 %d 中药物' % len(importantMedical)

            # writecsvname='../resultsdata/result_'+readcsvname.split('/')[-1]
            # data_process.write_in_csv(writecsvname, importantMedical)

    medicaListSet = list(set(allresults))
    # 统计每种药物出现的次数
    numarray = []
    n = []
    for item in medicaListSet:
        n.append(item)
        n.append(allresults.count(item))
        numarray.append(n)
        n = []
    # 以次数排序
    numarray = sorted(numarray, key=lambda x: x[1], reverse=True)
    writecsvname = '../resultsdata/result.csv'
    data_process.write_in_csv(writecsvname, numarray)
Exemplo n.º 10
0
    # # 手动去除allMedicalCount_1.csv里频次为1的药物;和调和药“甘草”
    # writecsvname = '../formulaData_1/QRJD_medical_count.csv'
    # data_process.write_in_csv(writecsvname , numarray)

    #统计处方中不同频次的药物占比
    # readcsvname = 'allMedicalCount_combine_QFCS.csv'
    # tongji(readcsvname)

    #step 2 计算方剂向量特征
    #(1)使用one-hot表示,每个方剂的维数等于所有方剂中药物的去重个数,若出现则为1 *********presFeature_onehot.csv
    #数据1:全取自防风数据集
    csvname1='../formulaData_1/QRJD_pres.csv'
    csvname2 = '../formulaData_1/QRJD_medical_count.csv'
    pFeatrue= presFeature(csvname1,csvname2)
    writecsvname = '../formulaData_1/presFeature_onehot_QRJD_584t.csv'
    data_process.write_in_csv(writecsvname , pFeatrue)
    #
    # # (2)使用配伍单位数值表示,每个方剂的维数等于所有方剂中药物的去重个数*********presFeature_realValue.csv
    # csvname1 = 'prescription_6.csv'
    # csvname2 = 'allMedicalCount_1.csv'
    # pFeatrue = presFeature_1(csvname1, csvname2)
    # writecsvname = 'presFeature_realValue_combine_QFCS_223t.csv'
    # data_process.write_in_csv(writecsvname, pFeatrue)
    #
    # # (3)使用配伍单位数值表示,每个方剂的维数等于所有方剂中药物的去重个数,在方剂中做归一化处理*********presFeature_standardValue.csv
    # csvname1 = 'prescription_6.csv'
    # csvname2 = 'allMedicalCount_1.csv'
    # 数据2:与web爬取数据结合的正负例
    # csvname1='combinePrescription.csv'
    # csvname2 = 'allMedicalCount_combine_QFCS.csv'
Exemplo n.º 11
0
 result = sess.run([logits, model.B,model.Q], feed_dict={model.input_pl: batch_input, labels: batch_tags})
 for j in range(len(batch_tags)):
     if np.argmax(batch_tags[j]) == 0:
         if np.argmax(labelList[k_count]) == np.argmax(batch_tags[j]):
             evalCount += 1
     rs += np.sum(np.argmax(labelList[k_count]) == np.argmax(batch_tags[j]))
 # print('labelList[k_count]',labelList[k_count])
 if not np.argmax(labelList[k_count]):
     preClass = True
 else:
     preClass = False
 medicalList = []
 if FLAGS.visualize == True and preNum==finalNum:
   #保存每个方剂中的WP1参数—获取药物相互作用注意力 start
   if i < 4:
         data_process.write_in_csv('../myMedicalModel/atentionVision/%s_inter.csv' % i,
                                   Q_value[k_count])
   # 保存每个方剂中的WP1参数—获取药物相互作用注意力 end
   f.write('<div style="margin:15px;">\n')
   #result[1][0]保存的是方剂中每个药物对应的attention因子,具体result[1][0][k][j]取出
   for k in range(len(result[1][0])):
     f.write('\t<p> —— 测试方剂 %s (类标:%s ; 预测类标:%s):—— </p>\n'%(i, tags[i],preClass))
     f.write('<p style="margin:10px;font-family:SimHei">\n')
     ww = TOKENIZER_RE.findall(words[i*batch_size][0])
     for j in range(word_pad_length):
       if (attMetricAll[k_count][j]/finalNum) < a:
           color= 0
       else:
           color=attMetricAll[k_count][j]/finalNum
       alpha = "{:.2f}".format(color)
       if len(ww) <= j:
         w = "   "
Exemplo n.º 12
0
        if num != 0:
            zz = []
            item[0] = item[0].replace('[[', '')
            item[0] = item[0].replace(']]', '')
            item[0] = item[0].replace('', '')
            # print 'zz',item[0]
            zz.append(num)
            zz.append(float(item[0]))
            weightlist.append(zz)
        num += 1

    weightlist = sorted(weightlist, key=lambda x: x[1], reverse=True)

    for i in range(0, 11):
        zz = []
        zz.append(medicallist[weightlist[i][0] - 1])
        zz.append(weightlist[i][1])
        importantMedical.append(zz)

    print '当功效为‘祛风除湿’时,占主导作用的药物组合是:\n'
    count = 0
    # print importantMedical
    for item in importantMedical:
        # print item
        print '药物%d:' % (count + 1), item[0], item[1]
        count += 1
    print '一共有 %d 中药物' % len(importantMedical)

    writecsvname = 'result_' + readcsvname
    data_process.write_in_csv(writecsvname, importantMedical)
Exemplo n.º 13
0
    # for item in csvdata:
    #     num = 0
    #     for itemdata in item:
    #         if itemdata == '':
    #             item.pop(num)
    #         num += 1
    #     datas.append(item)
    # data_process.write_in_csv(writecsvname, datas)
    #删除没有内容的项  end


    # # (3)药名-数量单位 一一提取匹配;处理“各”的情况 补填;处理“等分”,“少许”等词清除
    readcsvname='../formulaData_1/pres_5W_3.csv'
    writecsvname='../formulaData_1/pres_5W_4.csv'
    finalmedicallist=extractnumfromstr(readcsvname, writecsvname)
    data_process.write_in_csv(writecsvname, finalmedicallist)

    #8-1 目前考虑用one-hot表示,先不做这步
    # #step 5 把单位kg,钱,两 统一为 g(克)
    # csvname='../formulaData_1/pres_5W_4.csv'
    # normalList= dataDetailProcess.unitTransformation(csvname)
    # writecsvname='../formulaData_1/pres_5W_5.csv'
    # data_process.write_in_csv(writecsvname, normalList)
    #
    # #step 6 清洗一些none单位的杂音 如‘等分,少许’
    # csvname='../formulaData_1/pres_5W_5.csv'
    # noneList= dataDetailProcess.noneStandard(csvname)
    # writecsvname='../formulaData_1/pres_5W_6.csv'
    # data_process.write_in_csv(writecsvname, noneList)

Exemplo n.º 14
0
# coding=utf-8
import data_process

readcsvname = '../formulaData_Experiment/ExResult_onehot_QRJD.csv'
readdata = data_process.read_csv(readcsvname)
datalist = []
data = []
for item in readdata:
    print 'item', item[0]
    num = 0
    for i in item[0].split(' '):
        print '1', i
        if (num == 3):
            print '2', i
            print i.split(':')[-1]
            maxiter = i.split(':')[-1]
            data.append(float(maxiter))

        num += 1
    acc = item[1].split('acc:')[-1]
    acc = acc.replace('"', '')
    print acc
    data.append(float(acc))
    datalist.append(data)
    data = []

writecsvname = '../formulaData_1/L1_draw.csv'
medicaldata = data_process.write_in_csv(writecsvname, datalist)
Exemplo n.º 15
0
                                        attMetric.append(thisMetic)
                                    f.write('</div>\n')

                            if FLAGS.visualize == True and preNum < finalNum:
                                f.write('\t<p>Test accuracy: %s</p>\n' %
                                        (rs / total))
                                f.write(
                                    '\t<p>该功效下%s个经典方剂(即测试集前%s个方剂) accuracy :%s</p>\n'
                                    % (evalNum, evalNum, evalCount / evalNum))
                                f.write(
                                    '\t<p>该功效下%s个经典方剂 avg-dice : %s</p>\n' %
                                    (evalNum, sum(allDice) / evalNum))
                                f.write('</body></html>')
                                f.close()
                                data_process.write_in_csv(
                                    '../myMedicalModel/modelvsECMSR_Apriori/metircsResults/attMetric'
                                    + preName + str(preNum) + ".csv",
                                    attMetric)
                                if attMetricAll == []:
                                    attMetricAll = attMetric
                                else:
                                    for x, itemx in enumerate(attMetricAll):
                                        for y, itemy in enumerate(itemx):
                                            attMetricAll[x][y] = attMetricAll[
                                                x][y] + attMetric[x][y]
                        ##########################################################最终结果显示
                        if FLAGS.visualize == True and preNum == finalNum:
                            f = open(
                                '../myMedicalModel/modelvsECMSR_Apriori/html/final_%s_visualizeTCM_%s_noLSTM_HWH_epoches%s_r1_num%s.html'
                                % (preName, preName, FLAGS.num_epochs, preNum),
                                'w')
                            f.write(
Exemplo n.º 16
0
        count = 0
        # print item
        zz = []
        for itemdata in item:
            # print count
            # print itemdata
            if (count % 2) == 0:
                zz.append(itemdata)
                count += 1
            else:
                count += 1
        # print zz
        finallist.append(zz)

    return finallist


if __name__ == '__main__':
    print('准备Apriori算法数据....')
    #换其他功效时 修改这里即可
    function = '祛风除湿'
    # readcsvname1='prescription_6.csv'
    # readcsvname2='function_1.csv'
    #
    # pickData(readcsvname1, readcsvname2,function)

    readcsvname = 'Apriori_QFCS_Prescription.csv'
    finallist = onlyWord(readcsvname)
    writename = 'Apriori_QFCS_data.csv'
    data_process.write_in_csv(writename, finallist)