def writeRanks(ids): with newman_connector() as read_cnx1, newman_connector() as read_cnx, newman_connector() as write_cnx: with execute_query(read_cnx1.conn(), stmt) as qry: txid = Tx(read_cnx.conn()).next() print "tx: %s" % txid facts = Fact(write_cnx.conn(), autocommit=False) print "assigning ranks" for mail in qry.cursor(): #print mail[0] #, "email_addr", "rank", ids.get(mail,'0'), txid facts.addFact(mail[0], "email_addr", "rank", ids.get(mail[0],'0'), txid) print "commit" write_cnx.commit()
def writeRanks(ids): with newman_connector() as read_cnx1, newman_connector( ) as read_cnx, newman_connector() as write_cnx: with execute_query(read_cnx1.conn(), stmt) as qry: txid = Tx(read_cnx.conn()).next() print "tx: %s" % txid facts = Fact(write_cnx.conn(), autocommit=False) print "assigning ranks" for mail in qry.cursor(): #print mail[0] #, "email_addr", "rank", ids.get(mail,'0'), txid facts.addFact(mail[0], "email_addr", "rank", ids.get(mail[0], '0'), txid) print "commit" write_cnx.commit()
node['community'] = community_name #output format #NODE\tCOMMUNITY # for node in nodes: # print "{}\t{}".format(node['name'], node['community']) count = counter(1) with newman_connector() as read_cnx, newman_connector() as write_cnx: txid = Tx(read_cnx.conn()).next() print "tx: %s" % txid facts = Fact(write_cnx.conn(), autocommit=False) print "assigning communities" for node in nodes: email_addr, community_id = node['name'], node['community'] facts.addFact(email_addr, "email_addr", "community", community_id, txid) facts.addFact(email_addr, "email_addr", "group_id", next(count), txid) print "commit" write_cnx.commit() txid = Tx(read_cnx.conn()).next() print "tx: %s" % txid print "assign community ids" stmt = ( " insert into facts (subject, schema_name, predicate, obj, tx) " " select f.subject, f.schema_name, 'community_id', f2.obj, %s " " from facts f join facts f2 on f.obj = f2.subject " " where f.schema_name = 'email_addr' " " and f.predicate = 'community' "
subject, body, str(tosize), str(ccsize), str(bccsize), str(attachsize), attach, bodysize, "", count) outrow = zip(headers, [ str(num), threadid, dir, category, utc_date, fromemail, toemail, ccemail, bccemail, subject, body, str(tosize), str(ccsize), str(bccsize), str(attachsize), attach, bodysize, "", count ]) #line number fact.addFact(num, "email", "line_num", count, tx) #ingest email in to stage table for header, val in outrow: #do not bother with empty string if val: if header == "body": pass else: fact.addFact(num, "email", header, val, tx) #ingest individual to, cc, bcc into stage table for header, addrs in (("to", toemail), ("cc", ccemail), ("bcc", bccemail)): for addr in addrs.split(';'): if addr:
node['community'] = community_name #output format #NODE\tCOMMUNITY # for node in nodes: # print "{}\t{}".format(node['name'], node['community']) count = counter(1) with newman_connector() as read_cnx, newman_connector() as write_cnx: txid = Tx(read_cnx.conn()).next() print "tx: %s" % txid facts = Fact(write_cnx.conn(), autocommit=False) print "assigning communities" for node in nodes: email_addr, community_id = node['name'], node['community'] facts.addFact(email_addr, "email_addr", "community", community_id, txid) facts.addFact(email_addr, "email_addr", "group_id", next(count), txid) print "commit" write_cnx.commit() txid = Tx(read_cnx.conn()).next() print "tx: %s" % txid print "assign community ids" stmt = ( " insert into facts (subject, schema_name, predicate, obj, tx) " " select f.subject, f.schema_name, 'community_id', f2.obj, %s " " from facts f join facts f2 on f.obj = f2.subject " " where f.schema_name = 'email_addr' " " and f.predicate = 'community' " " and f2.schema_name = f.schema_name and f2.predicate = 'group_id' "
#print references threadid = references.split()[0] tosize = len(toemail.split(';')) ccsize = len(ccemail.split(';')) - 1 bccsize = len(bccemail.split(';')) - 1 attachsize = len(attach.split(';')) - 1 bodysize = len(body) # ingest in to Email table EmailRow(cnx.conn()).addEmail(str(num), threadid, dir, category, utc_date, fromemail, toemail, ccemail, bccemail, subject, body, str(tosize), str(ccsize), str(bccsize), str(attachsize), attach, bodysize, "", count) outrow = zip(headers, [str(num), threadid, dir, category, utc_date, fromemail, toemail, ccemail, bccemail, subject, body, str(tosize), str(ccsize), str(bccsize), str(attachsize), attach, bodysize, "", count]) #line number fact.addFact(num, "email", "line_num", count, tx) #ingest email in to stage table for header, val in outrow: #do not bother with empty string if val: if header == "body": pass else: fact.addFact(num, "email", header, val, tx) #ingest individual to, cc, bcc into stage table for header, addrs in (("to", toemail), ("cc", ccemail), ("bcc", bccemail)): for addr in addrs.split(';'): if addr: fact.addFact(num, "email", header, addr, tx)