def get_piece_parsed(nodel, tag_model, pcfg_model):#返回树节点结构 kbest=[(nodel,0)] #keep k best,(nodes, score)#diff:diff from last one while True: kbest,ktmp=get_undone(kbest,beam_size) if len(ktmp)==0: #all finished break#[0] fts_l=[] for ndl in ktmp: fts=get_nodel_feature(ndl[0]) # get features fts_l.extend(fts) fts_l.append('') #tmp='tmp'#write feature file write_file(piece_parse_tmp,fts_l) crftag_l=piece_parse_model_test(piece_parse_tmp)#get result #print len(crftag_l),len(ktmp) if len(crftag_l)!=len(ktmp): #print crftag_l print 'number not match?piece_parse.py#get_piece_parsed' # new_ktmp=[] # for i in xrange(len(crftag_l)): # # return: [(nodel, diff_node_index, new_tag_features)] # new_ndl_l=change_nodel(ktmp[i][0],crftag_l[i]) # for new_ndl in new_ndl_l: # new_ktmp.append((new_ndl[0], # ktmp[i][1]+count_score(new_ndl[1],pcfg_model))) # #count_score(pcfg_model,new_ndl[1])),the diff, new node, add up it's score new_ndl_l=[] fts_l=[] for i in xrange(len(crftag_l)): # pack the tag crf task to be faster # return: [(nodel, diff_node_index, new_tag_features)] new_ndl_tmp=change_nodel(ktmp[i][0],crftag_l[i]) new_ndl_l.append(new_ndl_tmp) fts_l.extend([x[2] for x in new_ndl_tmp]) new_tag_l=get_new_tag_pack_main(fts_l) tag_index=0 new_ktmp=[] for i in xrange(len(crftag_l)): for new_ndl in new_ndl_l[i]: # new_ndl=(nodel, diff_node_index, new_tag_features) new_ndl[0][new_ndl[1]].tag=new_tag_l[tag_index] tag_index+=1 new_ktmp.append((new_ndl[0], ktmp[i][1]+count_score(new_ndl[0][new_ndl[1]],pcfg_model))) #count_score(pcfg_model,new_ndl[1])),the diff, new node, add up it's score kbest.extend(new_ktmp) #print nodel kbest=[(x[0][0],x[1]) for x in kbest] #only the node left return kbest#todo 每个子句都是前最佳然后拼在一起,不能只是单个子句最佳,done
def get_piece_joint(kbest,tag_model,pcfg_model):#返回树节点结构 ''' nodel:a piece list(kbest), like[p1,p2,...] ''' #kbest=[(nodel,0)] #keep k best,(nodes, score)#diff:diff from last one #kbest=list(nodel) while True: kbest,ktmp=get_undone(kbest,beam_size) if len(ktmp)==0: #all finished break #return the final tree fts_l=[] for ndl in ktmp: fts=get_piece_joint_feature(ndl[0]) # get features fts_l.extend(fts) fts_l.append('') #tmp='tmp'#write feature file write_file(piece_joint_tmp,fts_l) crftag_l=piece_joint_model_test(piece_joint_tmp)#get result if len(crftag_l)!=len(ktmp): print 'number not match?piece_parse.py#get_piece_parsed' new_ndl_l=[] fts_l=[] for i in xrange(len(crftag_l)): # pack the tag crf task to be faster # return: [(nodel, diff_node_index, new_tag_features)] new_ndl_tmp=change_nodel(ktmp[i][0],crftag_l[i]) new_ndl_l.append(new_ndl_tmp) fts_l.extend([x[2] for x in new_ndl_tmp]) new_tag_l=get_new_tag_pack_main(fts_l) tag_index=0 new_ktmp=[] for i in xrange(len(crftag_l)): for new_ndl in new_ndl_l[i]: # new_ndl=(nodel, diff_node_index, new_tag_features) new_ndl[0][new_ndl[1]].tag=new_tag_l[tag_index] tag_index+=1 new_ktmp.append((new_ndl[0], ktmp[i][1]+count_score(new_ndl[0][new_ndl[1]],pcfg_model))) #count_score(pcfg_model,new_ndl[1])),the diff, new node, add up it's score kbest.extend(new_ktmp) #print nodel kbest=[(x[0][0],x[1]) for x in kbest] #only the node left return kbest#最后只剩下一个节点