if q_type not in type2questions: type2questions[q_type] = [(words, w, q_type)] else: type2questions[q_type].append((words, w, q_type)) break if not flag: # print(i, words) no_q_words.append((words, 'other')) type2questions['other'].append((words, 'other')) # print(len(no_q_words)) for k, v in type2questions.items(): print(k, len(v)) for i in v[:10]: print(i) with open('../data/question_type.txt', 'w') as f_out: for k, v in type2questions.items(): f_out.write(k + '\n') for i in v: tmp = ' '.join(i[0]) f_out.write(tmp + '\t' + '\t'.join(i[1:]) + '\n') if __name__ == '__main__': # test('2006年7月27日,360安全卫士正式推出。') # get_all_questions() my_parser = Parser() # analysis_questions(my_parser) # my_parser.get_question_type('缓刑适用于几年以下的有期徒刑') my_parser.read_train_set('../data/BoP2017-DBQA.dev.txt') my_parser.analysis_question(0)
print(k, len(v)) for i in v[:10]: print(i) with open('../data/question_type.txt', 'w') as f_out: for k, v in type2questions.items(): f_out.write(k + '\n') for i in v: tmp = ' '.join(i[0]) f_out.write(tmp + '\t' + '\t'.join(i[1:]) + '\n') if __name__ == '__main__': # test('2006年7月27日,360安全卫士正式推出。') # get_all_questions() my_parser = Parser() # analysis_questions(my_parser) # my_parser.get_question_type('缓刑适用于几年以下的有期徒刑') my_parser.read_train_set('../data/BoP2017-DBQA.train.txt') count = 0 for i in range(len(my_parser.articles)): res = my_parser.analysis_question(i, debug=False) # for i in range(10): # res = my_parser.analysis_question(i, debug=True) if res == 0: count += 0 else: count += 1.0 / res print('score', count / len(my_parser.articles)) # my_parser.analysis_question(0)