def Identify_pre_process_data(l2, ln_o, onto_basis, purpose, id, t_h, ret_ps): Identify.pre_process_data(l2, ln_o, onto_basis, purpose, id, t_h, ret_ps)
def process_page(all_ps, id, purpose, pgs, finish, th, pg_index_rs, all_size_pg, job_index, addresses, result_onto_tree_er, onto_basis): #try: if True: ln_o = '' endereco_url = all_ps[0] #=== progress = int(pg_index_rs / all_size_pg) l2 = Identify.prepare_layout(id, purpose) for lines_doc2_ in all_ps: #try: print 'Start page:', pg_index_rs, ' of total:', all_size_pg if True: endereco_url = lines_doc2_[0] lines_doc2 = lines_doc2_[1] if True: #============= parse fuzzy =========================================== t_threads = [] ret_ps = [] indice_linha = 0 for s in lines_doc2: indice_linha += 1 ln_o = s addresses.append(endereco_url) if umisc.trim(ln_o) == '': continue ret_ps.append([]) t_threads.append(thread_cntl()) print 'Preprocessdata in page:', pg_index_rs, ' of total:', all_size_pg, ' line:', indice_linha, ' of:', len( lines_doc2) Identify.pre_process_data( l2, ln_o, onto_basis, purpose, id, t_threads[len(t_threads) - 1], ret_ps[len(ret_ps) - 1]) print 'Preprocessdata END in page:', pg_index_rs, ' of total:', all_size_pg, ' line:', indice_linha, ' of:', len( lines_doc2) #thread.start_new_thread(Identify_pre_process_data,(l2,ln_o,onto_basis,purpose,id,t_threads[len(t_threads)-1],ret_ps[len(ret_ps)-1]) ) #==================================================================== #run rcts linkadas sentence_index = 0 for s_ps in ret_ps: sentence_index += 1 print 'Process sentence:', sentence_index, ' of :', len( ret_ps ), ' page:', pg_index_rs, ' of total:', all_size_pg ir = Identify.resume_process_data( s_ps, onto_basis, purpose, id) if ir[0] != None: # procura identificador --- fnd_ident = False for es in ir[0].topicos: if ir[0].es_compare_dt(es, 'identificador'): fnd_ident = True if not fnd_ident: ind = len(result_onto_tree_er) - 1 fond_cs = False while ind >= 0 and not fond_cs: for es2 in result_onto_tree_er[ ind].topicos: if ir[0].es_compare_dt( es2, 'identificador'): ir[0].set_topico_nr(es2) fond_cs = True break ind -= 1 # verificar se nao tem somente identificadores(elemento fact invalido) oth = False for es in ir[0].topicos: if ir[0].es_compare_dt(es, 'identificador'): pass else: oth = True if not oth: continue result_onto_tree_er.append(ir[0]) # procurar group ind = len(result_onto_tree_er) - 1 while ind >= 0: if ir[0] != result_onto_tree_er[ind]: for es2 in result_onto_tree_er[ ind].topicos: if ir[0].es_compare_dt( es2, 'identificador'): for top in ir[0].topicos: if ir[0].compare_dt_depend( conn, usr, purpose, es2, top, ['']): # encontrou referencias do mesmo identificador, incluir nos objetos linkados rt = None fnd_new = False for k1 in result_linked: for k2 in k1: if k1 == ir[0]: fnd_new = True rt = k1 #= if not fnd_new: result_linked.append( [ir[0]]) rt = result_linked[len( result_linked) - 1] #======================================= fnd_new = False for k2 in rt: if k2 == result_onto_tree_er[ ind]: fnd_new = True if not fnd_new: rt.append( result_onto_tree_er[ ind]) ind -= 1 #========================== if ir[1] != None: result_onto_tree_bpm.append(ir[1]) #except Exception ,err: # print 'Except on process pages:',err,'pg:',pg_index_rs traceQ(progress, id, pg_index_rs, (start_c / 10), endereco_url, 'Processed page:')