def get_piece_joint_pack(nodel_l):#多个处理 resl=[] tmp='tmp'#write feature file while True: indexl=[] fts_all=[] i=0 while i<len(nodel_l): if len(nodel_l[i])<=1: i+=1 continue indexl.append(i) fts=get_piece_joint_feature(nodel_l[i]) fts_all.extend(fts) fts_all.append('') i+=1 if len(indexl)==0: break write_file(tmp,fts_all) tags_all=piece_joint_model_test_pack(tmp)#get result i=0 for tags in tags_all: nodel_l[indexl[i]]=change_nodel(nodel_l[indexl[i]],tags) i+=1 #### for x in nodel_l: if x=='': resl.append('') else: resl.append(x[0]) return resl
def get_piece_joint(kbest,tag_model,pcfg_model):#返回树节点结构 ''' nodel:a piece list(kbest), like[p1,p2,...] ''' #kbest=[(nodel,0)] #keep k best,(nodes, score)#diff:diff from last one #kbest=list(nodel) while True: kbest,ktmp=get_undone(kbest,beam_size) if len(ktmp)==0: #all finished break #return the final tree fts_l=[] for ndl in ktmp: fts=get_piece_joint_feature(ndl[0]) # get features fts_l.extend(fts) fts_l.append('') #tmp='tmp'#write feature file write_file(piece_joint_tmp,fts_l) crftag_l=piece_joint_model_test(piece_joint_tmp)#get result if len(crftag_l)!=len(ktmp): print 'number not match?piece_parse.py#get_piece_parsed' new_ndl_l=[] fts_l=[] for i in xrange(len(crftag_l)): # pack the tag crf task to be faster # return: [(nodel, diff_node_index, new_tag_features)] new_ndl_tmp=change_nodel(ktmp[i][0],crftag_l[i]) new_ndl_l.append(new_ndl_tmp) fts_l.extend([x[2] for x in new_ndl_tmp]) new_tag_l=get_new_tag_pack_main(fts_l) tag_index=0 new_ktmp=[] for i in xrange(len(crftag_l)): for new_ndl in new_ndl_l[i]: # new_ndl=(nodel, diff_node_index, new_tag_features) new_ndl[0][new_ndl[1]].tag=new_tag_l[tag_index] tag_index+=1 new_ktmp.append((new_ndl[0], ktmp[i][1]+count_score(new_ndl[0][new_ndl[1]],pcfg_model))) #count_score(pcfg_model,new_ndl[1])),the diff, new node, add up it's score kbest.extend(new_ktmp) #print nodel kbest=[(x[0][0],x[1]) for x in kbest] #only the node left return kbest#最后只剩下一个节点