示例#1
0
文件: symptom.py 项目: hetaov/spider
def main():

    df = pds.read_excel('data/raw/symblos.xlsx', encoding="utf-8")

    symptom_dict, words = get_symptom()

    fl = open('data/ontology/symptom.ttl', 'w')

    print len(df)
    fl.write(all())
    l = len(df)
    line_s = list()

    for index, symptom in df.iterrows():
        symptom_uri = symptom_dict.get(symptom[3])
        #print symptom
        if type(symptom[9]) is unicode:
            line_s.append(
                'symptom:Q%s rdfs:comment "%s"@cn .' %
                (symptom_uri, remove_all_html(symptom[9]).encode('utf-8')))

        line_s.append('symptom:Q%s rdfs:label "%s"@cn .' %
                      (symptom_uri, symptom[3].encode('utf-8')))
        line_s.append(build_disease(symptom, symptom_uri))
        line_s.append(build_department(symptom, symptom_uri))
        line_s.append(build_symptom(symptom, symptom_uri))
        line_s.append(build_part(symptom, symptom_uri))
        build_food(symptom, symptom_uri)
        #fl.write('\n\n')

        print 'finished: %d: %d' % (index, l)
    fl.write('\n\n'.join(line_s))
    fl.close()
示例#2
0
def main():
    df = pds.read_excel('data/raw/disease.xlsx', encoding='utf-8')

    disease_dict, words = get_disease()

    fl = open('data/ontology/disease.ttl', 'w')

    fl.write(all())
    fl.write('\n\n')

    l = len(df)

    cols = df.columns

    for index, row in df.iterrows():
        #print row
        disease_uri = disease_dict.get(row[3])

        if type(row[5]) is unicode:
            fl.write('disease:Q%s rdfs:comment "%s"@cn .' %
                     (disease_uri, remove_all_html(row[5]).encode('utf-8')))
            fl.write('\n\n')
        fl.write('disease:Q%s rdfs:label "%s"@cn .' %
                 (disease_uri, row[3].encode('utf-8')))
        fl.write('\n\n')

        department_str = build_department(row, disease_uri)
        fl.write(department_str)
        fl.write('\n\n')
        #print department_str
        sym_str = build_symptom(row, index)
        fl.write(sym_str)
        fl.write('\n\n')
        #print sym_str
        check_str = build_check(row, disease_uri)
        fl.write(check_str)
        fl.write('\n\n')
        #print check_str

        disease_str = build_disease(row, disease_uri)
        fl.write(disease_str)
        fl.write('\n\n')
        #print disease_str

        medicine_str = build_medicine(row, index)
        fl.write(medicine_str)
        fl.write('\n\n')

        other_str = build_other(row, index, cols)
        fl.write(other_str.encode('utf-8'))
        fl.write('\n\n')
        print('finised:%d of %d' % (index, l))
        #print medicine_str

    fl.close()
示例#3
0
文件: check.py 项目: hetaov/spider
def main():
    word_dict, words = get_check()

    fl = open('data/ontology/check.ttl', 'w')

    line = list()
    fl.write(all())
    fl.write('\n\n')
    for index, row in enumerate(words):
        line.append('check:Q%d rdfs:label "%s"@cn .' % (index, row))

    fl.write('\n\n'.join(line).encode('utf-8'))
示例#4
0
文件: property.py 项目: hetaov/spider
def main():

    df = pds.read_csv('word/properties/medicine.csv')

    pro = open('data/ontology/pro.ttl', 'w')
    pro.write(all())

    pro.write('\n\n\n')

    for index, row in df.iterrows():
        pro.write('prom:P%d rdfs:label "%s"@cn .\n' % (index, row['name']))

    pro.close()
示例#5
0
def main():
    department_1, department_2 = get_department()
    fl_1 = open('data/ontology/department_1.ttl', 'w')
    fl_2 = open('data/ontology/department_2.ttl', 'w')

    fl_1.write(all())
    fl_2.write(all())

    line_1_s = list()
    line_2_s = list()

    fl_1.write('\n\n')
    fl_2.write('\n\n')

    for index, row in enumerate(department_1):
        line_1_s.append('department_1:Q%d rdfs:label "%s"@cn .' % (index, row))

    for index, row in enumerate(department_2):
        line_2_s.append('department_2:Q%d rdfs:label "%s"@cn .' % (index, row))

    fl_1.write('\n'.join(line_1_s).encode('utf-8'))
    fl_1.close()
    fl_2.write('\n'.join(line_2_s).encode('utf-8'))
    fl_2.close()
示例#6
0
def main():
    word_dict, words = get_component()

    fl = open('data/ontology/component_desc.ttl', 'w')

    df = pds.read_csv('data/word/components_word_new.csv', encoding='utf-8')

    line = list()
    fl.write(all())
    fl.write('\n\n')

    for index, row in df.iterrows():
        if word_dict.get(row[0]):
            line.append('ele:Q%d rdfs:comment "%s"@cn .' % (word_dict.get(row[0]), remove_all_html(row[1])))

    fl.write('\n\n'.join(line).encode('utf-8'))
示例#7
0
def build_product():
    medicine_dict, words = get_medicine_product()

    fl = open('data/ontology/medicine_product.ttl', 'w')

    line_s = list()

    for index, row in enumerate(words):
        print row
        print index
        print '------>'
        #fl.write('medicine:Q%d rdfs:label "%s"@cn' % (index, row))
        line_s.append('drug:Q%d rdfs:label "%s"@cn .' % (index, row))

    fl.write(all())
    fl.write('\n\n'.join(line_s).encode('utf-8'))
    fl.close()
示例#8
0
def main():

    medicine_dict = get_medicine()

    df = pds.read_csv('word/properties/medicine.csv', encoding="utf-8")

    m_words = get_words()

    m_dict = element(m_words)

    medicine_x = pds.read_excel('data/raw/medicine.xlsx', encoding="utf-8")

    medicine_o = open('data/triple/medicine.ttl', 'w')

    ls = df['name']


    dict_name = {}
    for index, x in enumerate(ls):
        dict_name[x] = 'P%d' % index


    medicine_o.write(all())

    medicine_o.write('\n\n\n')

    man_dict = manufacturer()

    dosage_dict = dosage_form()
    
    formula_dict = get_formula()

    generic_dict = get_generic_medicine()

    line_s = list()

    l = len(medicine_x)

    for index, medicine in medicine_x.iterrows():

        #print medicine
        #medicine_uri = medicine_dict.get(medicine[2])
        print medicine[2]
        medicine_uri = get_by_keyword(medicine[2])
        print medicine_uri
        print '==============='
        medicine_uri = int(medicine_uri.id)
        #print medicine[30]
        #print medicine[2]
        for col_i, col in enumerate(medicine_x.columns):

            #print col
            pid = dict_name.get(col)
            prop = medicine.get(col)
            #print prop
            #print pid
            if col == u'是否医保' and prop == 1:
                line_s.append('drug:Q%d prom:%s %s . \n' % (medicine_uri, 'P30', 'medicine:Q1'))
                continue

            if pid and col == u'分子量' and prop and type(prop) is float and not math.isnan(prop):
                line_s.append('drug:Q%d prom:%s %s . \n' % (medicine_uri, 'P18', prop))
                continue

            if pid and prop and type(prop) is unicode:
                if col == u'生产企业':
                    #pass
                    line_s.append('drug:Q%d prom:%s org:%s . \n' % (medicine_uri, dict_name.get(col), man_dict.get(prop)))
                    continue
                elif col == u'主要成份':
                    ele_str = remove_html(prop)
                    els = extrat(m_words, ele_str)
                    if len(els) > 0:
                        for el in els:
                            el_pro = m_dict.get(el)
                            line_s.append('drug:Q%d prom:%s ele:Q%s . \n' % (medicine_uri, dict_name.get(col), el_pro))
                    continue
                    #print ','.join(els)
                elif col == u'适应症':
                    line_s.append(indications(prop, medicine_uri, 'P8'))
                    line_s.append('\n')
                    line_s.append(indications_sym(prop, medicine_uri, 'P8'))
                    line_s.append('\n')
                    continue
                elif col == u'剂型':
                    form_str = remove_special_character(prop)
                    line_s.append('drug:Q%d prom:P31 dosage_form:Q%d .' % (medicine_uri, dosage_dict.get(form_str)))
                    line_s.append('\n')
                    continue
                elif col == u'药品名称':
                    line_s.append('drug:Q%d prom:P1 drug_generic:Q%d .' % (medicine_uri, generic_dict.get(prop)))
                    line_s.append('\n')
                    continue
                elif col == u'禁忌':
                    line_s.append(indications(prop, medicine_uri, 'P11'))
                    line_s.append('\n')
                    line_s.append(indications_sym(prop, medicine_uri, 'P11'))
                    line_s.append('\n')
                    continue
                elif col == u'分子式':

                    formula_str = format_formula(prop)
                    if formula_str and formula_dict.get(formula_str):
                        line_s.append('drug:Q%d prom:%s formula:Q%s . \n' % (medicine_uri, 'P17', formula_dict.get(formula_str)))
                        line_s.append('\n')

                    continue
                elif col == u'不良反应':
                    #line_s.append(indications(prop, medicine_uri, 'P11'))
                    #line_s.append('\n')
                    line_s.append(indications_sym(prop, medicine_uri, 'P10'))
                    line_s.append('\n')
                    continue
                else:
                    prop = remove_html(prop)
                    #pass
                    line_s.append('drug:Q%d prom:%s "%s"@cn . \n' % (medicine_uri, dict_name.get(col), remove_all_html(prop).encode('utf-8')))
                    line_s.append('\n')
                    continue

        line_s.append('\n\n\n')
        print '<---------:%d of %d' % (index, l)

    medicine_o.write(''.join(line_s))
    medicine_o.close()