def runc_layer(layer): objects=[] ref_opcode='' # Relaction-oper-opcode opcode original dos qualificadores se nao especificado #===================================================== def get_o_ob(): for s in objects: if s.name==ob: return s return None #===================================================== if True: print 'RTS-REF(1):------------' r=layer for s in r.topicos: print 'Topico:',s.dt print 'SNS:++++++++++++++++++' for s1 in s.sinapses: print s1.nr.dt print '++++++++++++++++++' print 'RTS-REF(1)(END):------------' obj_foco=[] #print 'N tops:' , len(layer.topicos) for t in layer.topicos: tp=t nr_t=t.dt[0] if nr_t == '': continue if 'referencial.source' in t.dt : sn_dt='' for sn in tp.sinapses: if 'indicador' not in sn.nr.dt: for s1 in sn.nr.dt: sn_dt+=(s1+' ') if umisc.trim(sn_dt) != '$$id$$' and umisc.trim(sn_dt) != '': print 'referencial.source:',sn_dt if len(obj_foco) > 0: lay=obj_foco[0] lay.name+=(sn_dt) else: lay=mdNeural.mdLayer() lay.name=sn_dt #==== objects.append(lay) obj_foco.append(lay) rel1=False for t in layer.topicos: topico_rsf=t #============================================== for dt_top in topico_rsf.dt: if dt_top in [ 'referencial'] or ref_opcode in [ 'refer']: for ob1 in obj_foco: ob1.set_topico_nr(topico_rsf) break print 'Obj.foco.refer:',obj_foco return obj_foco
def get_db_pages(usr2): resultSet = conn.sql("select pg from know_pages where USERNAME='******' order by i") typ = [] for results in resultSet: ts = results[0].read() if umisc.trim(ts) != '' and umisc.trim(ts) != '\n' and umisc.trim( ts) != '\r': typ.append(ts) print 'Reuse pgs:', len(typ) return typ
def pg_open(addresss, th, pages, pgind, ind_emit, start_c): print 'Start read page:', addresss try: for address in addresss: lines_doc = [] links_k2 = [] if address != 'debug-url': #====================== #opener = urllib2.build_opener() address = urllib.quote(address) #url='http://www.mind-net.com/get_Text.php?q='+address pg_add = address #content = opener.open(url, '' ).read() content = call_text(address) tmpd = '' for d in content: if d == '\n': tmpd = umisc.trim(tmpd) lines_doc.append(tmpd) tmpd = '' else: tmpd += d #====================== #opener = urllib2.build_opener() #url='http://www.mind-net.com/get_links.php?q='+address #content = opener.open(url, '' ).read() content = call_links(address) tmpd = '' for d in content: if d == '\n': tmpd = umisc.trim(tmpd) links_k2.append(tmpd) tmpd = '' else: tmpd += d #============ pages.append(Task_C(pg_add, lines_doc, links_k2)) print 'Get content for page:', pgind, ' was finished.Len:', len( lines_doc), ' links count:', len(links_k2) pgind += 1 else: for line_deb in entry_doc: lines_doc.append(line_deb) pages.append(Task_C(pg_add, lines_doc, links_k2)) print 'Get content for page:', pgind, ' was finished.Len:', len( lines_doc) pgind += 1 th.finished = True except Exception, er: print er, '................' th.finished = True
def post_object_by_data3p(layer, cenario, usr, termo, foco, posted_objs, senti, l_p_ant): if layer.name == '': return def get_top_level(obj, foc, usr, termo_s): rts = [] resultSet = conn.sqlX( "SELECT lev,id_top FROM SEMANTIC_OBJECT_DT3 where OBJECT = ? and TOPICO= ? and USERNAME = ? and UID= ? order by LEV ", ([obj, foc, usr, termo_s])) for results in resultSet: i = results[0] id_top = results[1] rts.append([i, id_top]) return rts #======================= nameo = layer.name if umisc.trim(nameo) == '' or umisc.trim(nameo) == '\n': if l_p_ant != None: nameo = l_p_ant.name if umisc.trim(nameo) == '' or umisc.trim(nameo) == '\n': return fnd_tops = False l_p_ant = layer print 'POST:LR:', nameo print '++------------------------------------------' for s in layer.topicos: print 'DT:', s.dt fnd_tops = True for d in s.sinapses: print d.nr.dt print '++------------------------------------------' if not fnd_tops: return print 'Post-obj:[', nameo, ']' no_post_o = False for [s, st] in posted_objs: if s == nameo and st == senti: no_post_o = True posted_objs.append([nameo, senti]) #========== #if not no_post_o and len(layer.topicos)>0: if not no_post_o: sql1 = "insert into SEMANTIC_OBJECT3(username,objeto,cenar,senti) values(?,?,?,?)" try: conn.sqlX(sql1, ([usr, nameo, cenario, senti])) except Exception, err: print 'Erro ao post(OBJECT):', err
def load_pages_know(usr): rts = get_db_pages(usr) if len(rts) > 0: cnt = 0 for r in rts: cnt += 1 entry_doc.append([Task_C('debug', r), cnt]) elif len(rts) == 0: file = open("c:\\wamp\\www\Neural\\tst_training.txt") cnt = 0 lsn = '' while 1: line = file.readline() if not line: #if umisc.trim(lsn) != '': # cnt+=1 # entry_doc.append([ Task_C('debug',lsn),cnt ] ) break #=============================== s = umisc.trim(line) if s == '' or s == '\n': cnt += 1 lsn = lsn.replace('\r', ' ') lsn = lsn.replace('\n', ' ') entry_doc.append([Task_C('debug', lsn), cnt]) post_db_page(usr, lsn) lsn = '' else: lsn += (' ' + line)
def run_cmd(): try: Pyro.core.initClient() objectName = 'layoutBean' hostname = '79.143.185.3' port = '28' print 'Creating proxy for object', objectName, ' on ', hostname + ':' + port if port: URI = 'PYROLOC://' + hostname + ':' + port + '/' + objectName else: URI = 'PYROLOC://' + hostname + '/' + objectName print 'The URI is', URI proxy = Pyro.core.getProxyForURI(URI) #== list = proxy.process_cmd('get_page', '', '') print 'List.getted!!' list = umisc.trim(list) if len(list) < 5: return params = list cmd = 'python entry_SemaIndexerStage1.py ' + params print cmd os.system(cmd) except: log.exception("ERROR==============") time.sleep(2)
def sep_multiple_data_unkwon(dt): dts = parse_t(dt) l = len(dts) - 1 rt = [] while l >= 0: if dts[l] in ['o', 'a', 'os', 'as']: rt.insert(0, dts[l]) break rt.insert(0, dts[l]) l -= 1 c = '' for r in rt: c += (' ' + r) return umisc.trim(c)
def pg_open(address,th,page,pgind,ind_emit,start_c): if ind_emit > 0 : pgind=ind_emit if start_c > 0 : #print 'Get content for page:',pgind,'\n' print 'Get content for page:',pgind else: print 'Get content for page:',pgind try: opener = urllib2.build_opener() address=urllib.quote(address) url='http://www.mind-net.com/get_Text.php?q='+address content = opener.open(url, '' ).read() lines_doc=[] tmpd='' for d in content: if d == '\n': tmpd=umisc.trim(tmpd) if tmpd.find('http://') > -1: ''' ''' else: lines_doc.append(tmpd) tmpd='' else: tmpd+=d page.dt1=pg_add page.dt2=lines_doc th.finished=True except : th.finished=True print 'Get content for page:',pgind,' was finished.Len:',len(lines_doc)
def post_links(endereco,termo,usr,purp): try: sql1="insert into WEB_CACHE_LINKS (URL,TERMO,PURPOSE,USR,PROCESSED) values(%s,%s,%s,%s,'N')" if umisc.trim(endereco) != '': cursorpostl.execute (sql1,(MySQLdb.escape_string(endereco),MySQLdb.escape_string(termo),purp,usr)) except: pass
def pg_open(address,th,page,pgind,ind_emit,start_c): if ind_emit > 0 : pgind=ind_emit if start_c > 0 : #print 'Get content for page:',pgind,'\n' print 'Get content for page:',pgind else: print 'Get content for page:',pgind try: lines_doc=[] if address != 'debug-url': opener = urllib2.build_opener() address=urllib.quote(address) url='http://www.mind-net.com/get_Text.php?q='+address content = opener.open(url, '' ).read() tmpd='' for d in content: if d == '\n': tmpd=umisc.trim(tmpd) if tmpd.find('http://') > -1: ''' ''' else: lines_doc.append(tmpd) tmpd='' else: tmpd+=d else: for line_deb in entry_doc: lines_doc.append(line_deb) page.dt1=pg_add page.dt2=lines_doc th.finished=True except Exception,er : print er,'................' th.finished=True
def get_db_pages(usr2,pg_ex,connc): def fecha_pagina(uid): print 'Close MSG:',uid s=w_cache3.find({'doc_id':uid}) s[u'PROCESSED']='S' w_cache3.update({'_id':s['_id']},s) def remote_f(): print 'Getting remote-pages...' return proxy.get_pages('',usr2) if RemoteL: return remote_f() print 'PG_EX',pg_ex,len(pg_ex) pgs_exs= pg_ex.split(',') #resultSet = connc.sql ("select pg,i,title from web_cache3 where USR='******' and i in( "+pg_ex+" ) order by i") resultSet=[] for p1 in pgs_exs: rg=None for d in w_cache3.find({'doc_id':p1}): rg=d if rg != None: try: rtc=str(rg[u'pg']) rtc2=str(rg[u'title']) resultSet.append( [ rtc1.encode('latin-1'),p1,rtc2.encode('latin-1') ] ) except: print 'Error.get.pg:',rg log.exception("") try: resultSet.append( [ rg[u'PG'].encode('latin-1'),p1,rg[u'TITLE'].encode('latin-1') ] ) except: print 'Error.get.pg(2):',rg log.exception("") typ=[] print 'Collect pg:',pg_ex #=============================================== #typ.append(['O perfil da empresa no Twitter foi criado em 20 de Fevereiro de 2008.',35835 ]) #return typ #================================================ for [ts,ids,ids2] in resultSet: if ids2 == None: ids2= '' if umisc.trim(ids2) != '': ts=(ids2+': '+ts) #if umisc.trim(ts) != '' and umisc.trim(ts) != '\n' and umisc.trim(ts) != '\r': typ.append([ts,ids]) if re_post and not fs_teste: fecha_pagina(ids) print 'Read page',ids #,ts print 'Reuse pgs:',len(typ) return typ
def post_cn(its): f=open("/Neural/txt_logs","w") print 'POST.LEN:',len(its) for [PG,PROCESSED,TERM,USR,PURPOSE,URL_ICON,URL_PICTURE,ID_USR,NAME_USR,STORY,TITLE,DOC_ID,TP,PHONE,STREET,CITY,COUNTRY,ZIP,LATITUDE,LONGITUDE,TPS,URL] in its: c1=str(PG) if umisc.trim(c1) != '': f.write( str(PG)+'|'+str(PROCESSED)+'|'+str(TERM)+'|'+str(USR)+'|'+str(PURPOSE)+'|'+str(URL_ICON)+'|'+str(URL_PICTURE)+'|'+str(ID_USR)+'|'+str(NAME_USR)+'|'+str(STORY)+'|'+str(TITLE)+'|'+str(DOC_ID)+'|'+str(TP)+'|'+str(PHONE)+'|'+str(STREET)+'|'+str(CITY)+'|'+str(COUNTRY)+'|'+str(ZIP)+'|'+str(LATITUDE)+'|'+str(LONGITUDE)+'|'+str(TPS)+'|'+str(URL)+'|'+'\n' ) f.close()
def get_aliases_ob( ): str_ret=[] for ir in layer_processes.lrs: print 'get_aliases_ob()->ir(1):',ir #if ir != None: print 'lr:',ir.name for topico in ir.topicos: if len(topico.dt) > 0 : topicodt=topico.dt if 'identificador' in topicodt or 'realid' in topicodt or 'realid2' in topicodt or 'object' in topicodt: dtk='' for p in topico.sinapses: for dts1 in p.nr.dt: dtk+=' '+umisc.trim(dts1) if umisc.trim(dtk) != '': print 'Collect.element:',dtk str_ret.append(umisc.trim(dtk) ) return str_ret
def post_pagina(endereco,conteudo_i,termo,usr,purp): try: conteudo='' for l in conteudo_i: conteudo+=(l+'\n') if umisc.trim(conteudo) != '': sql1="insert into WEB_CACHE (URL,PG,TERMO,PURPOSE,USR,SEMA_RESUME) values(%s,%s,%s,%s,%s,'')" cursorpostp.execute (sql1,(MySQLdb.escape_string(endereco),MySQLdb.escape_string(conteudo),MySQLdb.escape_string(termo),purp,usr)) except : pass
def post_object_by_data_es(layer,usr): nameo=layer.name nameo=umisc.trim(nameo) clear_obj(usr,nameo) print 'Post LR:',nameo,',len:',len(nameo),', usr:'******'' for d in tp.dt: tp_Dt+=d tp_name=tp_Dt for sn in tp.sinapses: sn_dt='' for s1 in sn.nr.dt: sn_dt+=s1 sql1="insert into "+mdTb.table_dt+"(username,\"UID\",dt,topico,LEV,SIN,ID_TOP) values(?,?,?,?,?,'Composicao',1)" try: conn.sqlX (sql1,([usr,uid,sn_dt,tp_Dt,level])) except: print 'Erro ao post:',nameo,tp_Dt,sn_dt log.exception("--------------------------") #========== post_nr(usr,sn.nr,level+1) #========== =============================================== post_nr(usr,tp) #=============================================== for lnk in layer.links: sqlc='insert into '+mdTb.table_relaction+'(OBJ_ORIG,OBJ_DEST,OPCODE,USERNAME,FOCO,FOCO_D,\"UID\") values(?,?,?,?,?,?,?)' #==================== def get_nr_dts1(nrs): d='' for nr in nrs: for n in nr.dt: d+=n d+=',' return d #==================== foco_o=get_nr_dts1(lnk.foco_o) foco_d=get_nr_dts1(lnk.foco_d) conn.sqlX (sqlc,([nameo,lnk.lr.name,lnk.opcode,usr,foco_o,foco_d,0])) #=============== post_object_by_data_es(lnk.lr,usr)
def get_db_pages(usr2): def remote_f(): print 'Getting remote-pages...' return proxy.get_pages('',usr2) if RemoteL: return remote_f() resultSet = conn.sql ("select pg,title,story,i from web_cache where USERNAME='******' order by i") #resultSet = conn.sql ("select pg,title,story,i from web_cache where USERNAME='******' and i = 96 order by i") typ=[] for results in resultSet: ts=results[0].read() ids=results[1] if umisc.trim(ts) != '' and umisc.trim(ts) != '\n' and umisc.trim(ts) != '\r': typ.append([ts,ids]) if len(typ) > 100: break print 'Reuse pgs:',len(typ) return typ
def get_aliases_ob(): str_ret = [] for ir in layer_processes.lrs: print 'get_aliases_ob()->ir(1):', ir for topico in ir.topicos: if len(topico.dt) > 0: topicodt = topico.dt if 'identificador' in topicodt or 'realid' in topicodt or 'realid2' in topicodt or 'object' in topicodt: tmpcs = '' for p in topico.sinapses: for dts1 in p.nr.dt: tmpcs += ' ' + dts1 str_ret.append(umisc.trim(tmpcs)) return str_ret
def post_object_by_data_es(layer, usr): cenario = 0 senti = 0 #def post_object_by_data3p( , , ,,, ,): print 'Post LR:', nameo, ',len:', len(nameo), ', usr:'******'Post layer:', nameo fnd_tops = False geral_uuid = cenario print 'POST:LR:', nameo print '++------------------------------------------' for s in layer.topicos: print 'DT:', s.dt fnd_tops = True for d in s.sinapses: print d.nr.dt print '++------------------------------------------' if not fnd_tops: return ky1 = nameo + ' ' + str(cenario) nameo = ky1 print 'Post-obj:[', nameo, ']' #========== #if not no_post_o and len(layer.topicos)>0: print 'Insert-OBJ:' #== if not no_post_o: #sql1="insert into SEMANTIC_OBJECT3(username,objeto,cenar,senti) values(?,?,?,?)" try: cols = { "username": usr, "objeto": ky1, "cenar": str(cenario), "sento": str(senti) } tb_object.insert(ky1, cols) #conn.sqlX (sql1,([usr,nameo,cenario,senti])) except Exception, err: print 'Erro ao post(OBJECT):', err
def process_page(lines_doc2,id,purpose,pgs,finish,th): ln_o='' try: l2=Identify.prepare_layout(id,purpose) if True : for s in lines_doc2: ln_o+=' '+s if umisc.trim(ln_o) == '': finish.finished=True return ir=Identify.process_data(l2,ln_o,onto_basis,purpose,id,th) if ir[0] != None : result_onto_tree_er.append(ir[0]) if ir[1] != None: result_onto_tree_bpm.append(ir[1]) finish.finished=True except: finish.finished=True print 'Thread ',pgs,' was finished.','Len:',len(ln_o),' process:',start_c/10
def collect_vitrines(): import pycassa from pycassa.pool import ConnectionPool from pycassa.columnfamily import ColumnFamily from pycassa import index pool2 = ConnectionPool('MINDNET', ['79.143.185.3:9160'], timeout=100000) tab2 = pycassa.ColumnFamily(pool2, 'cache_products') pub = pycassa.ColumnFamily(pool2, 'to_posting2') wb2 = pycassa.ColumnFamily(pool2, 'web_cache1') # lugar para indexar #================================= counter = 0 for ky, col in pub.get_range(): prods = col['id_product'].decode('hex') rc = tab2.get(prods) from_id3 = col['from_id'] from_id3 = from_id3.replace('\'', '') from_id3 = from_id3.replace('[', '') from_id3 = from_id3.replace(']', '') from_id2 = from_id3.split(',') from_id = '' for ifr in from_id2: ifr = umisc.trim(ifr) if len(ifr) > 1: if ifr[0] in [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ] and ifr[1] in [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]: from_id = ifr break counter += 1 col['id_post'] = str(counter) url_lomadee = rc['lomadee_url'] pub.insert(ky, col) prod = rc['cod_p'] rccc = wb2.get(prods.encode('hex')) rccc['lomadee_url'] = rc['lomadee_url'] # gerate_insert_client(from_id, prod, rccc) #============================ return products_client
def process_p(arrpg1,dm): print 'pages ok. init process ...' thst=[] indc=0 for pg in arrpg1: indc+=1 [u,uip]=pg if umisc.trim(u)=='': continue thst.append(thread_cntl()) params=get_feeds(u,0,'',None,[],uip,u) #========================== t = Thread(target=run_cmd, args=(params,thst[len(thst)-1])) t.start() while True: fndac=False for t in thst: if not t.finished: fndac=True time.sleep(1) if not fndac: break
def pg_open(addresss, th, pages, pgind, ind_emit, start_c): try: for address in addresss: lines_doc = [] if address != 'debug-url': opener = urllib2.build_opener() address = urllib.quote(address) print 'Open page:', address url = 'http://www.mind-net.com/get_Text.php?q=' + address content = opener.open(url, '').read() tmpd = '' for d in content: if d == '\n': tmpd = umisc.trim(tmpd) if tmpd.find('http://') > -1: ''' ''' else: lines_doc.append(tmpd) tmpd = '' else: tmpd += d #============ pages.append(Task_C(pg_add, lines_doc)) print 'Get content for page:', pgind, ' was finished.Len:', len( lines_doc) pgind += 1 else: for line_deb in entry_doc: lines_doc.append(line_deb) pages.append(Task_C(pg_add, lines_doc)) print 'Get content for page:', pgind, ' was finished.Len:', len( lines_doc) pgind += 1 th.finished = True except Exception, er: print er, '................' th.finished = True
def __init__(self, prefixo, sufixo, define, returnsa): prefix = [] defs = [] sfx = [] #================================== for p in prefixo: prefix.append(latinupper(p)) #================================== for d in define: if umisc.trim(d) != "": defs.append(latinupper(d)) #================================== for sf in sufixo: sfxc = [] for sf2 in sf: sfxc.append(latinupper(sf2)) sfx.append(sfxc) #================================== self.prefixo = prefix self.define = defs self.sufixo = sfx self.returns = returnsa
def get_filter_in_objs(obj, sinapses_consid1, dfin1, canditates, firp): resultSet22 = [] for mn in min_purposes: if umisc.trim(mn) == '': continue mn = parse_each_topico2(mn, dfin1) #print 'start.min.p:',mn c2 = mdTb.valida_topico31_2(mn, usr, obj, True, '', 0, 0, sinapses_consid1) #print 'min.p:',mn ,c2 if len(c2) == 0: return [] for c1 in c2: if c1 in canditates or firp: canditates.append(c1) firp = False canditates2 = canditates canditates = [] for c in canditates2: if not c in canditates: canditates.append(c) for c in canditates: # r = [[c, mg.tb_get([tb_object31], c)]] resultSet22.append(r) return resultSet22
def get_by_keyword( is2 ): # busca algumas palavras chave para extrair os 'samples', amostras de codigo para calibrar e treinar o processador fuzzy isd = [] cnc = False for ch in is2: try: cnc = False try: rows = w_cache.get(ch) except: cnc = True if cnc: continue if True: #============================ print 'Print pg:', ch #================= PG = rows[u'pg'] PROCESSED = rows[u'processed'] TERMO = rows[u'termo'] USR = rows[u'usr'] PURPOSE = rows[u'purpose'] URL_ICON = rows[u'url_icon'] URL_PICTURE = rows[u'url_picture'] ID_USR = float(rows[u'id_usr']) NAME_USR = rows[u'name_usr'] STORY = rows[u'story'] TITLE = rows[u'title'] DOC_ID = rows[u'doc_id'] TP = rows[u'tp'] PHONE = '' STREET = '' CITY = '' COUNTRY = '' ZIP = '' LATITUDE = '' LONGITUDE = '' TPS = rows['tps'] URL = rows['url'] #========== if PG != None: pass else: PG = '' if URL_ICON != None: pass else: URL_ICON = '' if URL_PICTURE != None: pass else: URL_PICTURE = '' if STORY != None: pass else: STORY = '' if TITLE != None: pass else: TITLE = '' if URL != None: pass else: URL = '' words = tokeniz(PG) fnd = False fnd2 = False if 'are now friends' in PG: fnd2 = True elif 'is now friends with' in PG: fnd2 = True elif PG[:7] == 'http://': fnd2 = True elif 'likes' in PG: fnd2 = True elif '{like}' in PG: fnd2 = True #=== for w in words: if 'quer' in w: fnd = True elif 'precis' in w: fnd = True elif 'poderia' in w: fnd = True elif 'pode' in w: fnd = True elif 'podi' in w: fnd = True elif 'gostar' in w: fnd = True elif 'pensand' in w: fnd = True elif 'comprar' in w: fnd = True elif 'adquirir' in w: fnd = True elif 'pens' in w: fnd = True elif 'pegar' in w: fnd = True elif 'encontr' in w: fnd = True elif 'indicar' in w: fnd = True #================================ if umisc.trim(PG) == '': fnd = False if fnd and not fnd2: isd.append([ PG, PROCESSED, TERMO, USR, PURPOSE, URL_ICON, URL_PICTURE, ID_USR, NAME_USR, STORY, TITLE, DOC_ID, TP, PHONE, STREET, CITY, COUNTRY, ZIP, LATITUDE, LONGITUDE, TPS, URL ]) #apagar o item, passando p tabela processados somente os I,DOC_ID para o processo de reprocessamento nao considerar mais esses documentos #=================================================================== #I=0 #conn.sqlX('insert into PROC_DS (ID,DOC_ID) values(?,?)',[I,ch]) proc_ds.insert(ch, {'ch': ch}) #====================================================================== w_cache.remove(ch) except: log.exception("") conn.rollback() return [] conn.commit() return isd
def get_ontology_ponderate( aliases, min_purposes, max_purposes, usr, dfin, sinapses_consid): # min_purposes=mandatory, max_purposes=max ideal rts = [] firp = True resultSet2 = [] canditates = [] alias = aliases if alias == None: alias = '%' #====================================================================================================================================== #====================================================================================================================================== def get_filter_in_objs(obj, sinapses_consid1, dfin1, canditates, firp): resultSet22 = [] for mn in min_purposes: if umisc.trim(mn) == '': continue mn = parse_each_topico2(mn, dfin1) #print 'start.min.p:',mn c2 = mdTb.valida_topico31_2(mn, usr, obj, True, '', 0, 0, sinapses_consid1) #print 'min.p:',mn ,c2 if len(c2) == 0: return [] for c1 in c2: if c1 in canditates or firp: canditates.append(c1) firp = False canditates2 = canditates canditates = [] for c in canditates2: if not c in canditates: canditates.append(c) for c in canditates: # r = [[c, mg.tb_get([tb_object31], c)]] resultSet22.append(r) return resultSet22 #====================================================================================================================================== #====================================================================================================================================== if aliases == None or umisc.trim(aliases) == '': firp = True for mn in min_purposes: if umisc.trim(mn) == '': continue mn = parse_each_topico2(mn, dfin) #print 'start.min.p:',mn c2 = mdTb.valida_topico31(mn, usr, None) #print 'min.p:',mn ,c2 if len(c2) == 0: return [] for c1 in c2: if c1 in canditates or firp: canditates.append(c1) firp = False canditates2 = canditates canditates = [] for c in canditates2: if not c in canditates: canditates.append(c) for c in canditates: # try: r = [[[c, mg.tb_get([tb_object31], c)]]] except: try: r = [[[c, mg.tb_get([tb_object1], c)]]] except: #r=[[c,tb_object.get(c)]] pass # cache temporario tb_object, nao deve haver ponderacoes nesse cache resultSet2.append(r) else: #print 'Collect->get(2):',alias,usr # ######################################################################## #if mdTb.Zeus_Mode: # r=[[alias,tb_object1.get(alias)]] #else: # r=[[alias,tb_object31.get(alias)]] r = get_filter_in_objs(alias, sinapses_consid, dfin, canditates, firp) resultSet2.append(r) #============================================= for resultSet23 in resultSet2: for resultSet in resultSet23: for key, results in resultSet: i = results[u'objeto'] uid = key #==== avaliable_objs = [] #===-------------------------------------- #print 'get_object_by_data29()->(',i,',',usr,',',max_purposes,')' ant_z = mdTb.Zeus_Mode mdTb.Zeus_Mode = False [obj_principal, ratting] = get_object_by_data29(i, usr, max_purposes) obj_principal.get_links('') mdTb.Zeus_Mode = ant_z #if len(obj_principal.topicos) > 0 : rts.append([ratting, obj_principal]) rts.sort() #=== return rts
def post_object_by_data3p(layer, cenario, usr, termo, foco, posted_objs, senti, l_p_ant): if layer.name == '': layer.name = 'undef' def get_top_level(obj, foc, usr, termo_s): rts = [] cl1 = index.create_index_expression(column_name='OBJECT', value=obj) cl2 = index.create_index_expression(column_name='TOPICO', value=foc) cl3 = index.create_index_expression(column_name='USERNAME', value=usr) cl4 = index.create_index_expression(column_name='UID', value=termo_s) clausec = index.create_index_clause([cl1, cl2, cl3, cl4], count=1000000) rest = tb_object_dt.get_indexed_slices(clausec) # #for results in resultSet: for kl, cols in rest: i = cols[u'lev'] id_top = cols[u'id_top'] rts.append([i, id_top]) return rts #======================= nameo = layer.name print 'Post layer:', nameo if umisc.trim(nameo) == '' or umisc.trim(nameo) == '\n': if l_p_ant != None: nameo = l_p_ant.name if umisc.trim(nameo) == '' or umisc.trim(nameo) == '\n': return fnd_tops = False l_p_ant = layer geral_uuid = cenario print 'POST:LR:', nameo print '++------------------------------------------' for s in layer.topicos: print 'DT:', s.dt fnd_tops = True for d in s.sinapses: print d.nr.dt print '++------------------------------------------' if not fnd_tops: return ky1 = nameo + ' ' + str(cenario) nameo = ky1 # #nameo = filter(lambda x: x in string.printable, nameo) nameo = nameo.replace('\n', '') nameo = nameo.replace('\t', '') nameo = nameo.replace('.', '') nameo = nameo.strip('\n') nameo = nameo.strip('\r') nameo = nameo.strip('\t') nameo = nameo.replace(' ', ' ') nameo = nameo.replace(' ', ' ') nameo = nameo.replace(' ', ' ') nameo = nameo.replace(' ', ' ') nameo = nameo.replace(' ', ' ') nameo = nameo.replace(' ', ' ') nameo = nameo.replace(' ', ' ') nameo = nameo.replace(' ', ' ') ky1 = nameo # print 'Post-obj:[', nameo, ']' # procurar nas tabs se ja existe e carregar os topicos q tem lá antes de postar os novos try: print 'get.object.reuse:' import mdTb layer_ant = mdTb.get_object_by_data(nameo, nameo) print 'get.object.reuse.return:', layer_ant if layer_ant != None: print 'reuse layer(', len(layer.topicos), '):', nameo, #layer.dump_layer() tps_ant_p = layer.topicos layer.topicos = [] for stop in layer_ant.topicos: if len(stop.dt) > 0: if stop.dt[0].lower() == 'identificador': continue layer.topicos.append(stop) for atop in tps_ant_p: layer.topicos.append(atop) print 'result:', len(layer.topicos) #layer.dump_layer() except: #log.exception("Error.get.object") pass # no_post_o = False for [s, st] in posted_objs: if s == nameo and st == senti: no_post_o = True posted_objs.append([nameo, senti]) #========== #if not no_post_o and len(layer.topicos)>0: def post_alldt(arr): #======================= b = tb_object_dt.batch(queue_size=len(arr)) for k, cols in arr: b.insert(str(k), cols) b.send() # def post_nr(uid, cnt, arr1, usr, tp, level=1, id_top=1, just_sin=False): try: if not just_sin: tp_Dt = '' try: for d in tp.dt: if type(d) == type([]): tp_Dt += d[0] else: tp_Dt += d except Exception, e: print 'Err:-nr.post(2):', tp.dt, '->', e tp_name = tp_Dt if len(tp.sinapses) == 0: ##UID,topico,LEV,sin,dt,id_top,username kyl1 = uid + '|' + str(cnt.value()) it = { "UID": uid, "topico": tp_Dt, "LEV": "1", "sin": '', "datach": '', "id_top": str(id_top), "username": usr, "cnt": str(cnt.value()) } arr1.append([kyl1, it]) cnt.inc() else:
def get_db_pages(usr2, pg_ex, connc): def remote_f(): print 'Getting remote-pages...' return proxy.get_pages('', usr2) if RemoteL: return remote_f() print 'PG_EX', pg_ex, len(pg_ex) pgs_exs = pg_ex.split(',') #resultSet = connc.sql ("select pg,i,title from web_cache3 where USR='******' and i in( "+pg_ex+" ) order by i") resultSet = [] #========================================================================================================================================== #========================================================================================================================================== #========================================================================================================================================== #========================================================================================================================================== #========================================================================================================================================== # se implementar testes,adicionar no resultSet #resultSet=['',''] #========================================================================================================================================== #========================================================================================================================================== #========================================================================================================================================== #========================================================================================================================================== #========================================================================================================================================== if len(resultSet) == 0: for p1 in pgs_exs: rg = w_cache3.get(p1) try: pg = rg['PG'] # pg = pg.replace('\n', '') pg = pg.replace('\t', '') pg = pg.strip('\n') pg = pg.strip('\r') pg = pg.strip('\t') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pagina = pg # pg = rg[u'TITLE'] pg = pg.replace('\n', '') pg = pg.replace('\t', '') pg = pg.replace('.', ' ') pg = pg.strip('\n') pg = pg.strip('\r') pg = pg.strip('\t') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') titulo = pg resultSet.append([pagina, p1, titulo]) except: pg = rg['pg'] # pg = pg.replace('\n', '') pg = pg.replace('\t', '') pg = pg.strip('\n') pg = pg.strip('\r') pg = pg.strip('\t') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pagina = pg # pg = rg[u'title'] pg = pg.replace('\n', '') pg = pg.replace('\t', '') pg = pg.strip('\n') pg = pg.strip('\r') pg = pg.strip('\t') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') pg = pg.replace(' ', ' ') titulo = pg resultSet.append([pagina, p1, titulo]) typ = [] print 'Collect pg:', pg_ex #=============================================== #typ.append(['O perfil da empresa no Twitter foi criado em 20 de Fevereiro de 2008.',35835 ]) #return typ #================================================ for [ts, ids, ids2] in resultSet: if ids2 == None: ids2 = '' if umisc.trim(ids2) != '': ts = (ids2 + ': ' + ts) #if umisc.trim(ts) != '' and umisc.trim(ts) != '\n' and umisc.trim(ts) != '\r': typ.append([ts, ids]) print 'Read page', ids #,ts print 'Reuse pgs:', len(typ) return typ
tp_Dt += d except: print 'Err:-nr.post(2):', tp.dt #================= for sn in tp.sinapses: sn_dt = '' try: for s1 in sn.nr.dt: if type(s1) == type([]): sn_dt += s1[0] else: sn_dt += s1 except: print 'Err:-nr.post:', sn.nr.dt #sql1="insert into SEMANTIC_OBJECT_DT3(UID,dt,topico,LEV,sin,id_top,username) values(?,?,?,?,?,?,?)" if umisc.trim(sn.opcode) == '': sn.opcode = 'Relaction-oper-opcode' #==================================================== #==================================================== kyl1 = uid + '|' + str(cnt.value()) it = { "UID": uid, "topico": tp_Dt, "LEV": str(level), "sin": sn.opcode, "datach": sn_dt, "id_top": str(id_top), "username": usr, "cnt": str(cnt.value()) } arr1.append([kyl1, it])
def post_object_by_data3p(layer,cenario,usr,termo,foco,posted_objs,senti,l_p_ant): if layer.name == '' : layer.name ='undef' def get_top_level(obj,foc,usr,termo_s): rts=[] cl1 = index.create_index_expression(column_name='OBJECT', value=obj) cl2 = index.create_index_expression(column_name='TOPICO', value=foc) cl3 = index.create_index_expression(column_name='USERNAME', value=usr) cl4 = index.create_index_expression(column_name='UID', value=termo_s) clausec = index.create_index_clause([cl1,cl2,cl3,cl4],count=1000000) rest=tb_object_dt.get_indexed_slices(clausec) # #for results in resultSet: for kl,cols in rest: i=cols[u'lev'] id_top=cols[u'id_top'] rts.append([i,id_top]) return rts #======================= nameo=layer.name print 'Post layer:',nameo if umisc.trim(nameo) == '' or umisc.trim(nameo) == '\n' : if l_p_ant != None: nameo=l_p_ant.name if umisc.trim(nameo) == '' or umisc.trim(nameo) == '\n' : return fnd_tops=False l_p_ant=layer geral_uuid=cenario print 'POST:LR:',nameo print '++------------------------------------------' for s in layer.topicos: print 'DT:',s.dt fnd_tops=True for d in s.sinapses: print d.nr.dt print '++------------------------------------------' if not fnd_tops: return ky1=nameo+' '+str(cenario) nameo=ky1 print 'Post-obj:[',nameo,']' no_post_o=False for [s,st] in posted_objs: if s == nameo and st==senti : no_post_o=True posted_objs.append([nameo,senti]) #========== #if not no_post_o and len(layer.topicos)>0: def post_alldt(arr): #======================= b = tb_object_dt.batch(queue_size=len(arr)) for k,cols in arr: b.insert(str(k),cols) b.send() # def post_nr(uid,cnt,arr1,usr,tp,level=1,id_top=1,just_sin=False): try: if not just_sin: tp_Dt='' try: for d in tp.dt: if type(d) == type([] ): tp_Dt+=d[0] else: tp_Dt+=d except Exception,e: print 'Err:-nr.post(2):',tp.dt,'->',e tp_name=tp_Dt if len(tp.sinapses)==0: ##UID,topico,LEV,sin,dt,id_top,username kyl1=uid+'|'+str(cnt.value()) it={"UID":uid,"topico":tp_Dt,"LEV":"1","sin":'',"datach":'',"id_top":str(id_top),"username":usr,"cnt":str(cnt.value())} arr1.append( [ kyl1,it ] ) cnt.inc() else: