#在buildroot建立子节点testquestion testquestion = ElementTree.SubElement(buildroot, "testquestion") #设置testquestion的各个属性 testquestion_id = ElementTree.SubElement(testquestion, "id") testquestion_id.text = str(i) ## print testquestion_id.text fo.write( testquestion_id.text); testquestion_q = ElementTree.SubElement(testquestion, "q") testquestion_q.text = str(question.getchildren ()[0].text.decode('utf8')) ## print testquestion_q.text fo.write( testquestion_q.text); testquestion_category = ElementTree.SubElement(testquestion, "category") ## testquestion_category.text = str(testtype) testquestion_category.text = clf.get_type(question.getchildren ()[0].text) ## print testquestion_category.text fo.write( testquestion_category.text); testquestion_query = ElementTree.SubElement(testquestion, "query") testquestion_query.text = str(testkeys.decode('utf8')) ## print testquestion_query.text fo.write( testquestion_query.text); ## testquestion_query.text = str(testkeys.encode('utf8')) ## print testquestion_query.text i=i+1 #将输出文档写入uu.xml buildtree = ElementTree.ElementTree(buildroot)
buildroot = ElementTree.Element("QuestionSet") clf = QClassifierImpl(train_data_path='../data/pair.xml') # clf = QClassifierImpl(train_data_path = 'train.xml') clf.train() for question in root.getchildren(): q_text = question.getchildren()[0].text testtags = jieba.analyse.extract_tags(q_text, topK=5) testkeys = ' '.join(testtags) testquestion = ElementTree.SubElement(buildroot, 'question') testquestion.set('id', question.get('id')) # question text q = ElementTree.SubElement(testquestion, 'q') q.text = q_text # question category category = clf.get_type(q_text) cate_tree = ElementTree.SubElement(testquestion, 'category') cate_tree.text = category # query word query = ElementTree.SubElement(testquestion, 'query') query.text = testkeys # break xml_string = ElementTree.tostring(buildroot, encoding='utf-8') xml = xml.dom.minidom.parseString(xml_string) print xml.toprettyxml()
buildroot = ElementTree.Element("QuestionSet") clf = QClassifierImpl(train_data_path = '../data/pair.xml') # clf = QClassifierImpl(train_data_path = 'train.xml') clf.train() for question in root.getchildren(): q_text = question.getchildren()[0].text testtags = jieba.analyse.extract_tags(q_text, topK=5) testkeys = ' '.join(testtags) testquestion = ElementTree.SubElement(buildroot, 'question') testquestion.set('id', question.get('id')) # question text q = ElementTree.SubElement(testquestion, 'q') q.text = q_text # question category category = clf.get_type(q_text) cate_tree = ElementTree.SubElement(testquestion, 'category') cate_tree.text = category # query word query = ElementTree.SubElement(testquestion, 'query') query.text = testkeys # break xml_string = ElementTree.tostring(buildroot, encoding = 'utf-8') xml = xml.dom.minidom.parseString(xml_string) print xml.toprettyxml()
# -*- coding: utf-8 -*- from QClassifier import QClassifierImpl # clf = QClassifierImpl(train_data_path = 'train.xml') clf = QClassifierImpl(train_data_path='../data/pair.xml') clf.train() print clf.get_type(u'西班牙的首都是哪座城市?') print clf.get_type(u'飞轮海中的吴尊现在多大了?') print clf.get_type(u'小美指的是哪位明星?') print clf.get_type(u'冯小刚的现任妻子是谁?') print clf.get_type(u'哪场战役代表美国正式卷入第二次世界大战?') print clf.get_type(u'《三国演义》中“官渡之战”中交战双方的指挥官分别是?') print clf.get_type(u'唱感动天感动地的人是那个歌手?')
#在buildroot建立子节点testquestion testquestion = ElementTree.SubElement(buildroot, "testquestion") #设置testquestion的各个属性 testquestion_id = ElementTree.SubElement(testquestion, "id") testquestion_id.text = str(i) ## print testquestion_id.text fo.write(testquestion_id.text) testquestion_q = ElementTree.SubElement(testquestion, "q") testquestion_q.text = str(question.getchildren()[0].text.decode('utf8')) ## print testquestion_q.text fo.write(testquestion_q.text) testquestion_category = ElementTree.SubElement(testquestion, "category") ## testquestion_category.text = str(testtype) testquestion_category.text = clf.get_type(question.getchildren()[0].text) ## print testquestion_category.text fo.write(testquestion_category.text) testquestion_query = ElementTree.SubElement(testquestion, "query") testquestion_query.text = str(testkeys.decode('utf8')) ## print testquestion_query.text fo.write(testquestion_query.text) ## testquestion_query.text = str(testkeys.encode('utf8')) ## print testquestion_query.text i = i + 1 #将输出文档写入uu.xml buildtree = ElementTree.ElementTree(buildroot) buildtree.write("testsetstep2.xml")
# -*- coding: utf-8 -*- from QClassifier import QClassifierImpl # clf = QClassifierImpl(train_data_path = 'train.xml') clf = QClassifierImpl(train_data_path = '../data/pair.xml') clf.train() print clf.get_type(u'西班牙的首都是哪座城市?') print clf.get_type(u'飞轮海中的吴尊现在多大了?') print clf.get_type(u'小美指的是哪位明星?') print clf.get_type(u'冯小刚的现任妻子是谁?') print clf.get_type(u'哪场战役代表美国正式卷入第二次世界大战?') print clf.get_type(u'《三国演义》中“官渡之战”中交战双方的指挥官分别是?') print clf.get_type(u'唱感动天感动地的人是那个歌手?')