示例#1
0
class Store:
    def __init__(self):
        self.connection = DB(dbname=environ.get('POSTGRESQL_ADDON_DB'),
                             host=environ.get('POSTGRESQL_ADDON_HOST'),
                             port=int(environ.get('POSTGRESQL_ADDON_PORT')),
                             user=environ.get('POSTGRESQL_ADDON_USER'),
                             passwd=environ.get('POSTGRESQL_ADDON_PASSWORD'))
        try:
            self.connection.query("SELECT * FROM stats")
        except ProgrammingError:
            print "Create Table"
            self.connection.query("""CREATE TABLE stats (timestamp TIMESTAMP PRIMARY KEY,value NUMERIC(5,2))""")
            # self.connection.query("truncate stats")

    def add(self, value):
        if float(value) > 0.1:
            self.connection.insert('stats', timestamp=get_current_time(), value=value)

    def display(self):
        self.connection.query("SELECT * FROM stats")

    def get_values(self, value='{}-{}-{}'.format(datetime.now().year, datetime.now().month, datetime.now().day)):
        result = self.connection.query(
            "SELECT extract(DOW FROM timestamp)::INT AS dow, timestamp, value FROM stats WHERE timestamp >= '" +
            value + "' AND timestamp < ('" + value + "'::DATE + '1 day'::INTERVAL) ORDER BY 2")
        return result

    def get_average_values(self,
                           value='{}-{}-{}'.format(datetime.now().year, datetime.now().month, datetime.now().day)):
        result = self.connection.query(
            "SELECT extract(DOW FROM timestamp)::INT AS dow, "
            "to_timestamp('" + value + " ' || "
                                       "LPAD(extract(HOUR FROM timestamp)::TEXT,2,'0') || ':' ||"
                                       "        LPAD(extract(MINUTE FROM timestamp)::TEXT,2,'0') ||"
                                       " ':00', 'YYYY-MM-DD HH24:MI:SS') AS timestamp,"
                                       "       round(avg(value)::NUMERIC,2) AS value FROM stats"
                                       " GROUP BY 1,2 ORDER BY 1,2")

        return result
示例#2
0
class PostgresqlAdapter(object):
    def __init__(self, dbname, user, passwd, host="localhost", port=5432):
        self._db = DB(dbname=dbname,
                      host=host,
                      port=port,
                      user=user,
                      passwd=passwd)

    def create(self, inhabitant):
        record = self._db.insert('inhabitants', data=inhabitant)
        return _normalize(record)

    def find_all(self):
        all_inhabitants = self._db.query(
            'select * from inhabitants').dictresult()
        return [_normalize(x) for x in all_inhabitants]

    def find_by_id(self, id):
        inhabitant = self._find_by_id(id)
        return _normalize(inhabitant)

    def find_by_name(self, name):
        all_inhabitants = self._db.query(
            "SELECT * FROM inhabitants " +
            "WHERE data ->> 'FirstName' ILIKE '%' || $1 || '%' OR " +
            "      data ->> 'LastName' ILIKE '%' || $1 || '%'",
            (name, )).dictresult()
        return [_normalize(x) for x in all_inhabitants]

    def delete(self, id):
        try:
            self._db.delete('inhabitants', id=int(id))
        except ValueError:
            pass

    def update(self, inhabitant):
        id = inhabitant["Id"]
        self._find_by_id(id)
        del inhabitant["Id"]
        stored = self._db.update('inhabitants', id=id, data=inhabitant)
        return _normalize(stored)

    def _find_by_id(self, id):
        try:
            return self._db.get('inhabitants', int(id))
        except (ValueError, DatabaseError):
            raise NoRecordFound()
示例#3
0
#insert_testing_patient.py
import datetime
from pg import DB
"""
Feb 27 2018
This script should be run to enter some patients into the table called 'patients' in the database called 'patient_db_test'


DATE formats:
YMD - year-month-day is the most desirable format, considered ISO 8601 standards
"""

db = DB("test_patients")
db.insert('patients', first_name = "NICHOLAS ANDREW", last_name = "MARSDEN", phn = "9067 062 924", dob = datetime.date(1946, 10, 6))
db.insert('patients', first_name = "JOHNNY ALAN GEORGE", last_name = "LUCK", phn = "9136 527 693", dob = datetime.date(1941, 5, 12))
db.insert('patients', first_name = "ELIZABETH M", last_name = "CARLEY", phn = "9051 748 373", dob = datetime.date(1939, 3, 25))
     for line in input(fileName):
         if len(line) < 20:
             continue
         
         if "Session" in line:
             session = line.split()[2][8:]
             if sessionOld is -1:
                 sessionOld = session
             if session not in sessionOld:
                 sessionDatabaseID = sessionDatabaseID + 1        
                 print "sessionDatabaseID: " + str(sessionDatabaseID)
                 sessionOld = session
         
         if "NOTIFY: login attempt" in line:
             sessionStartTime = DB.query(db, "select to_timestamp('" + line[0:22] + "', 'DD-Mon-YYYY (HH24:MI:SS)')").getresult()[0][0]
             DB.insert(db, 'connections', honeypot=honeypotName, session=sessionDatabaseID, ip=ip, port=port, authenticated='authentication method: SSH_AUTH_METHOD_PASSWORD', start_time=sessionStartTime, finish_time=sessionStartTime)
             print "login attempt from ip: " + ip 
 
         if "NOTIFY: client session ended" in line:
             sessionEndTime = DB.query(db, "select to_timestamp('" + line[0:22] + "', 'DD-Mon-YYYY (HH24:MI:SS)')").getresult()[0][0]
             DB.insert(db, 'connections', honeypot=honeypotName, session=sessionDatabaseID, authenticated='session ended', start_time=sessionEndTime, finish_time=sessionEndTime)
             print "session ended" 
 
         if "NOTIFY: client IP:" in line:
             ip = line.split()[6]
             sessionStartTime = DB.query(db, "select to_timestamp('" + line[0:22] + "', 'DD-Mon-YYYY (HH24:MI:SS)')").getresult()[0][0]
             DB.insert(db, 'connections', honeypot=honeypotName, session=sessionDatabaseID, ip=ip, port=port, authenticated='new client session', start_time=sessionStartTime, finish_time=sessionStartTime)
             print "session from ip: " + ip 
             print "fileName " + fileName
 
         if "NOTIFY: user:" in line:
from pg import DB
import numpy as np
import datetime

db = DB(dbname='Pingtung', host='localhost', port=5432,
    user='******', passwd='husky')

import os
for filename in os.listdir('Pingtung/Well'):
    site = filename[0:8]
    year = filename[9:13]
    print(filename)
    sites = db.query('select site, starttime, endtime from "Well_Stations" where site like \'{site}\''.format(site=site)).getresult()
    if sites != []:
        for line in open('Pingtung/Well/{FN}'.format(FN=filename), 'r'):
            #檢查時間是否相符
            data = line.split()
            if len(data[1]) == 8:
                if (data[0][0:4] != year) and (data[0][5:10] != '01-01'):
                    print('檔案有誤 日期不對')
                    break
            else:
                print('檔案有誤 時間不對')
                break
                
            db.insert('Well', site=site, date='{date} {time}'.format(date=data[0], time=data[1]), value=data[2])
    else:
        print('查無此站')


    json_object = json.loads(line)
    user_id = json_object['user_id']
    review_id = json_object['review_id']
    business_id = json_object['business_id']
    stars = json_object['stars']
    date = json_object['date']
    text = json_object['text']
    useful = json_object['useful']
    funny = json_object['funny']
    cool = json_object['cool']

    db.insert('review',
              user_id=user_id,
              review_id=review_id,
              business_id=business_id,
              stars=stars,
              date=date,
              text=text,
              useful=useful,
              funny=funny,
              cool=cool)

    i = i + 1
    if (i % 10000) == 0:
        print(str(int(i / 10000) * 10000) + ' lines processed...')

    line = f.readline()

f.close()
print('finished processing...')

f = open('yelp_academic_dataset_tip.json')
示例#7
0
MS = DBManage.DBManage(host="192.168.1.200",
                       port="1433",
                       user="******",
                       password="******",
                       database="VIM_VCDB",
                       charset="UTF-8")
PG = DB(dbname='VM', host='localhost', port=5432, user='******', passwd='123456')
MS.getMSSConnetion()

sql1 = "SELECT [ID] ,[STAT_ROLLUP] ,[NAME]  ,[GROUP_NAME]  ,[TYPE]  ,[UNIT]  ,[ASSOCIATE_IDS]   ,[STATS_LEVEL]    ,[FIXED_COLLECTION_INTERVAL]  FROM [VIM_VCDB].[dbo].[VPXV_STAT_COUNTERS]"

cursor1 = MS.QuerySql(sql1)
Tuple = cursor1.fetchone()
while Tuple:
    PG.insert('stat_counters',
              id=Tuple[0],
              stat_rollup=Tuple[1],
              name=Tuple[2],
              group_name=Tuple[3],
              type=Tuple[4],
              unit=Tuple[5],
              associate_ids=Tuple[6],
              stats_level=Tuple[7],
              fixed_collection_interval=Tuple[8])
    Tuple = cursor1.fetchone()

print("成功!")
MS.closeConn()
PG.close()
#!/usr/bin/env python

import sys
import yaml
import json
from pg import DB

bosh_manifest_path = sys.argv[1]
with open(bosh_manifest_path, 'r') as f:
    bosh_manifest = yaml.load(f.read())

settings_path = sys.argv[2]
with open(settings_path, 'r') as f:
    cf_ip = json.loads(f.read())['cf-ip']

postgres_properties = bosh_manifest['jobs'][0]['properties']['postgres']
dbname = postgres_properties.get('database')
host = postgres_properties.get('host')
port = postgres_properties.get('port', 5432)
user = postgres_properties.get('user')
passwd = postgres_properties.get('password')

db = DB(dbname=dbname, host=host, port=port, user=user, passwd=passwd)

domain_id = db.insert('domains', name='xip.io', type='NATIVE')['id']
db.insert('records', domain_id=domain_id, name='{0}.xip.io'.format(cf_ip), content='localhost [email protected] 1', type='SOA', ttl=86400, prio=None)
db.insert('records', domain_id=domain_id, name='{0}.xip.io'.format(cf_ip), content='dns-us1.powerdns.net', type='NS', ttl=86400, prio=None)
db.insert('records', domain_id=domain_id, name='*.{0}.xip.io'.format(cf_ip), content=cf_ip, type='A', ttl=120, prio=None)

db.close()
示例#9
0
class BotDB:
    def __init__(self, db_url):
        urlparse.uses_netloc.append("postgres")
        self.__db_url = db_url
        url = urlparse.urlparse(db_url)
        self.__db = DB(
            dbname=url.path[1:],
            user=url.username,
            passwd=url.password,
            host=url.hostname,
            port=url.port
        )

    def insertThesis(self, init_id, chat_id, user_id, body):
        ts = time.time()
        timestamp = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
        print("inserting thesis")
        print(init_id, chat_id, user_id, body, timestamp)
        self.__db.insert("theses", row={"init_id": init_id, "chat_id": chat_id, "user_id": user_id, "body": body})
        print("done")
        self.__db.commit()

    def getThesisByIds(self, init_id, chat_id):
        query = self.__db.query("SELECT * FROM theses WHERE init_id = %d AND chat_id = %d;" % (init_id, chat_id))
        dict_res = query.dictresult()
        if len(dict_res) == 0:
            return False
        else:
            return dict_res[0]

    def getThesisByBody(self, body):
        query = self.__db.query("SELECT * FROM theses WHERE body = '%s';" % body)
        dict_res = query.dictresult()
        if len(dict_res) == 0:
            return False
        else:
            return dict_res[0]

    def getLastThesesByTime(self, chat_id, interval):
        query = self.__db.query("SELECT * FROM theses WHERE chat_id = %s AND creation_time > current_timestamp - interval '%s';" % (chat_id, interval))
        dict_res = query.dictresult()
        if len(dict_res) == 0:
            return False
        else:
            return dict_res

    def getTodayTheses(self, chat_id):
        query = self.__db.query("SELECT * FROM theses WHERE chat_id = %s AND creation_time > current_date;" % chat_id)
        dict_res = query.dictresult()
        if len(dict_res) == 0:
            return False
        else:
            return dict_res

    def insertUser(self, user_id, username, first_name, last_name):
        # ts = time.time()
        row = {"user_id":user_id}
        if username:
            row["username"] = username
        if first_name:
            row["first_name"] = first_name
        if last_name:
            row["last_name"] = last_name
        self.__db.insert('users', row=row)
        self.__db.commit()

    def getUserById(self, user_id):
        query = self.__db.query("SELECT * FROM users WHERE user_id = %d;" % user_id)
        dict_res = query.dictresult()
        if len(dict_res) == 0:
            return False
        else:
            return dict_res[0]

    def insertBotMessage(self, chat_id, message_id, owner_id):
        row = {"chat_id": chat_id, "message_id": message_id, "owner_id": owner_id}
        self.__db.insert('bot_messages', row=row)
        self.__db.commit()

    def getBotMessage(self, chat_id, message_id):
        query = self.__db.query("SELECT * FROM bot_messages WHERE chat_id = %d AND message_id = %d;" % (chat_id, message_id))
        dict_res = query.dictresult()
        if len(dict_res) == 0:
            return False
        else:
            return dict_res[0]

    def close(self):
        self.__db.close()
示例#10
0
    bosh_manifest = yaml.load(f.read())

settings_path = sys.argv[2]
with open(settings_path, 'r') as f:
    cf_ip = json.loads(f.read())['cf-ip']

postgres_properties = bosh_manifest['jobs'][0]['properties']['postgres']
dbname = postgres_properties.get('database')
host = postgres_properties.get('host')
port = postgres_properties.get('port', 5432)
user = postgres_properties.get('user')
passwd = postgres_properties.get('password')

db = DB(dbname=dbname, host=host, port=port, user=user, passwd=passwd)

domain_id = db.insert('domains', name='xip.io', type='NATIVE')['id']
db.insert('records',
          domain_id=domain_id,
          name='{0}.xip.io'.format(cf_ip),
          content='localhost [email protected] 1',
          type='SOA',
          ttl=86400,
          prio=None)
db.insert('records',
          domain_id=domain_id,
          name='{0}.xip.io'.format(cf_ip),
          content='dns-us1.powerdns.net',
          type='NS',
          ttl=86400,
          prio=None)
db.insert('records',
示例#11
0
class PGSQL_DB(DB_Abstract):
    conn = None

    def __init__(self, dbname, dbhost, dbport, dbuser, dbpass):
        try:
            self.conn = DB(dbname=dbname,
                           host=dbhost,
                           port=dbport,
                           user=dbuser,
                           passwd=dbpass)
        except Exception as ex:
            print(ex)
            self.conn = None

        if (None != self.conn):
            tables = self.conn.get_tables()
            if (not ("public.users" in tables)):
                try:
                    self.conn.query(
                        "CREATE TABLE users(uid bigint primary key, name text, bio text)"
                    )
                except Exception as ex:
                    print(ex)
                    self.conn = None
            if (not ("public.tweets" in tables)):
                try:
                    self.conn.query(
                        "CREATE TABLE tweets(tid bigint primary key, author_id bigint, parent_id bigint, timestamp bigint, text text)"
                    )
                except Exception as ex:
                    print(ex)
                    self.conn = None

    def insert_user(self, user):
        if (not (type(user) is User)):
            print("type isn't user")
            return 400

        if ("" == user.user_id or "" == user.name):
            print("empty user")
            return 400

        if (None == self.conn):
            print("no connection")
            return 400

        try:
            self.conn.insert('users', {
                'uid': user.user_id,
                'name': user.name,
                'bio': user.bio
            })
        except Exception as ex:
            print(ex)
            return 400

        return 200

    def insert_tweet(self, tweet):
        if (not (type(tweet) is Tweet)):
            print("type isn't Tweet")
            return 400

        if ("" == tweet.tweet_id or "" == tweet.author_id
                or "" == tweet.parent_id or "" == tweet.timestamp
                or "" == tweet.text):
            print("empty tweet")
            return 400

        if (None == self.conn):
            print("no connection")
            return 400

        try:
            self.conn.insert(
                'tweets', {
                    'tid': tweet.tweet_id,
                    'author_id': tweet.author_id,
                    'parent_id': tweet.parent_id,
                    'timestamp': tweet.timestamp,
                    'text': tweet.text
                })
        except Exception as ex:
            print(ex)
            return 400

        return 200

    def get_user_by_id(self, user_id):
        if ("" == user_id):
            print("empty user_id")
            return None

        if (None == self.conn):
            print("no connection")
            return None

        try:
            u = self.conn.get('users', {'uid': user_id})
        except Exception as ex:
            print(ex)
            return None

        user = User()
        user.user_id = u['uid']
        user.name = u['name']
        user.bio = u['bio']

        return user

    def get_user_tweets(self, uid):
        if ("" == uid):
            print("empty uid")
            return None
        if (None == self.conn):
            print("no connection")
            return None

        try:
            t = self.conn.get_as_list('tweets',
                                      where="author_id = {0}".format(uid))
        except Exception as ex:
            print(ex)
            return None

        tweets = []

        for tweet in t:
            new_tweet = Tweet()
            new_tweet.tweet_id = tweet[0]
            new_tweet.author_id = tweet[1]
            new_tweet.parent_id = tweet[2]
            new_tweet.timestamp = tweet[3]
            new_tweet.text = tweet[4]
            tweets.append(new_tweet)

        if (0 == len(tweets)):
            return None

        return tweets

    def get_tweet_by_id(self, tweet_id):
        if (None == self.conn):
            print("no connection")
            return None

        try:
            t = self.conn.get('tweets', {'tid': tweet_id})
        except Exception as ex:
            print(ex)
            return None

        tweet = Tweet()
        tweet.tweet_id = t['tid']
        tweet.author_id = t['author_id']
        tweet.parent_id = t['parent_id']
        tweet.timestamp = t['timestamp']
        tweet.text = t['text']

        return tweet

    def get_replies_by_id(self, tweet_id):
        if (None == self.conn):
            print("no connection")
            return None

        try:
            t = self.conn.get_as_list('tweets',
                                      where="parent_id = " + tweet_id)
        except Exception as ex:
            print(ex)
            return None

        tweets = []
        for tweet in t:
            if (tweet[0] != tweet[2]):
                new_tweet = Tweet()
                new_tweet.tweet_id = tweet[0]
                new_tweet.author_id = tweet[1]
                new_tweet.parent_id = tweet[2]
                new_tweet.timestamp = tweet[3]
                new_tweet.text = tweet[4]
                tweets.append(new_tweet)

        if (0 == len(tweets)):
            return None

        return tweets
示例#12
0
from pg import DB
PG=DB(dbname='VM', host='192.168.1.112' ,port=5432,user='******',passwd='123456')
sql="select A.sample_time,A.stat_name,B.stat_rollup,B.unit,A.stat_group,A.entity,A.stat_value from hist_stat_daily A ,stat_counters B where A.stat_id=B.id "
q=PG.query(sql)
rows=q.getresult()
for row in rows:
    PG.insert("vm_state",time=row[0],stat_name=row[1],stat_rollup_type=row[2],unit=row[3],stat_group=row[4],entity=row[5],stat_value=row[6])
print("成功")
PG.close()
示例#13
0
'''
import xml.etree.ElementTree as ET
from pg import DB


if __name__ == '__main__':
    file = open("ruwiki.xml", encoding="utf-8")
    tree = ET.iterparse(file)
    
    db = DB(host="localhost", user="******", passwd="1234", dbname="wiki")
    db.query("TRUNCATE TABLE wiki")
    
    for i, line in enumerate(tree):
        event, element = line
        if element.tag == "page":
            pageid = element.find("id").text
            title = element.find("title").text
            timestamp = element.find("revision").find("timestamp").text.replace("T", " ")
            username = element.find("revision").find("contributor").find("username")
            if not username is None:
                author = username.text
            else:
                author = element.find("revision").find("contributor").find("ip").text
            text = element.find("revision").find("text").text
            
            db.insert("wiki", id=pageid, title=title, timestamp=timestamp, author=author, text=text)
            
            if i % 10**4 == 0:
                print(round(i/10**6 / 14.8 * 100, 2), "%", pageid, title, timestamp, author)
    
    print(db.query("SELECT id, title, timestamp, author FROM wiki"))
示例#14
0
print ("Table City - multiple rows at once")
cities = 'Seattle Spokane Tacoma Vancouver'.split()
data = list(enumerate(cities, start=1))
db.inserttable('city', data)


print ("Table Feature - multiple rows at once")
features = 'Library Park Museum Theatre'.split()
data = list(enumerate(features, start=1))
db.inserttable('feature', data)

print ("Table cityfeature - single row at once")


db.insert('cityfeatures',featureid=1,   cityid=1)
db.insert('cityfeatures',featureid=2,   cityid=1)
db.insert('cityfeatures',featureid=3,   cityid=1)
db.insert('cityfeatures',featureid=4,   cityid=1)

db.insert('cityfeatures',featureid=3,   cityid=2)
db.insert('cityfeatures',featureid=2,   cityid=3)
db.insert('cityfeatures',featureid=4,   cityid=4)


print ("Table location - single row at once")


db.insert('location', locationid =1,   featureid =1,    cityid=1 ,    commonname='Dorothy Stimson Bullitt Library',    address='100 University St',    website ='seattleartmuseum.org',    lng=47.6057,    lat=-122.3371 )
db.insert('location', locationid =2,   featureid =1,    cityid=1 ,    commonname='Region 10 Library',    address='1200 6th Ave #900',    website ='epa.gov',    lng=47.6090,    lat=-122.3320 )
示例#15
0
    db = DB(host="localhost", user="******", passwd="1234", dbname="wiki")
    db.query("TRUNCATE TABLE wiki")

    for i, line in enumerate(tree):
        event, element = line
        if element.tag == "page":
            pageid = element.find("id").text
            title = element.find("title").text
            timestamp = element.find("revision").find(
                "timestamp").text.replace("T", " ")
            username = element.find("revision").find("contributor").find(
                "username")
            if not username is None:
                author = username.text
            else:
                author = element.find("revision").find("contributor").find(
                    "ip").text
            text = element.find("revision").find("text").text

            db.insert("wiki",
                      id=pageid,
                      title=title,
                      timestamp=timestamp,
                      author=author,
                      text=text)

            if i % 10**4 == 0:
                print(round(i / 10**6 / 14.8 * 100, 2), "%", pageid, title,
                      timestamp, author)

    print(db.query("SELECT id, title, timestamp, author FROM wiki"))
示例#16
0
文件: MStoPG.py 项目: allendu91/VMDB
#获取计算资源数据
from pg import DB
import  DBManage

MS=DBManage.DBManage(host="192.168.1.200",port="1433",user="******",password="******",database="VIM_VCDB",charset="UTF-8")
PG = DB(dbname='VM', host='192.168.1.112',port=5432,user='******',passwd='123456')
MS.getMSSConnetion()

#sql1="SELECT [SAMPLE_TIME] ,[STAT_ID]   ,[STAT_NAME]  ,[STAT_GROUP]  ,[STAT_ROLLUP_TYPE]  ,[COUNTER_ID] ,[ENTITY] ,[DEVICE_NAME] ,[DEVICE_TYPE_NAME]   ,[STAT_VALUE] FROM [VIM_VCDB].[dbo].[VPXV_HIST_STAT_DAILY]"
sql="select A.[SAMPLE_TIME],A.[STAT_NAME],B.[STAT_ROLLUP],B.[UNIT],A.[STAT_GROUP],A.[ENTITY],A.[STAT_VALUE] from [VIM_VCDB].[dbo].[VPXV_HIST_STAT_DAILY] A ,[VIM_VCDB].[dbo].[VPXV_STAT_COUNTERS] B where A.[STAT_ID]=B.[ID] "

cursor1=MS.QuerySql(sql)
Tuple=cursor1.fetchone()
while Tuple:
    PG.insert("vm_state", time=Tuple[0], stat_name=Tuple[1], stat_rollup_type=Tuple[2], unit=Tuple[3], stat_group=Tuple[4],entity=Tuple[5], stat_value=Tuple[6])
    #PG.insert('hist_stat_daily',sample_time=Tuple[0],stat_id=Tuple[1],stat_name=Tuple[2],stat_group=Tuple[3],stat_rollup_type=Tuple[4],counter_id=Tuple[5],entity=Tuple[6],device_name=Tuple[7],device_type_name=Tuple[8],stat_value=Tuple[9])
    Tuple=cursor1.fetchone()

print("成功!")
MS.closeConn()
PG.close()
示例#17
0
from pg import DB
import  DBManage

MS=DBManage.DBManage(host="192.168.1.200",port="1433",user="******",password="******",database="VIM_VCDB",charset="UTF-8")
PG = DB(dbname='VM', host='localhost' ,port=5432,user='******',passwd='123456')
MS.getMSSConnetion()

sql1="SELECT [ID] ,[STAT_ROLLUP] ,[NAME]  ,[GROUP_NAME]  ,[TYPE]  ,[UNIT]  ,[ASSOCIATE_IDS]   ,[STATS_LEVEL]    ,[FIXED_COLLECTION_INTERVAL]  FROM [VIM_VCDB].[dbo].[VPXV_STAT_COUNTERS]"


cursor1=MS.QuerySql(sql1)
Tuple=cursor1.fetchone()
while Tuple:
    PG.insert('stat_counters',id=Tuple[0],stat_rollup=Tuple[1],name=Tuple[2],group_name=Tuple[3],type=Tuple[4],unit=Tuple[5],associate_ids=Tuple[6],stats_level=Tuple[7],fixed_collection_interval=Tuple[8])
    Tuple=cursor1.fetchone()

print("成功!")
MS.closeConn()
PG.close()
示例#18
0
class data_utils(object):
    """docstring for db_utils"""
    def __init__(self, arg):
        #super(db_utils, self).__init__()
        self.arg = arg
        self.db = DB(dbname='structnet_complete',
                     user='******',
                     passwd='structnet',
                     host='localhost')
        #self.identity = ''.join(random.choice(string.ascii_lowercase) for _ in range(8))
        #print self.identity
        print "Conncted!"
        self.start_time = time.time()

    def load_pmids(self):
        f = open(self.arg['corpus_map'], 'rb')
        self.pmid_dict = marshal.load(f)
        f.close()
        print "PMID loaded!"

    def insert_prediction(self):
        cnt = 0
        # 527.8M lines
        with open(self.arg['data_file'], 'r') as IN:
            for line in IN:
                #try:
                if cnt % 50000 == 0:
                    print self.arg['data_file'], "process ", cnt, " lines"
                cnt += 1
                tmp = json.loads(line)
                self.db.insert(self.arg['prediction_table'],
                               entity_a=tmp['em1Text'],
                               entity_b=tmp['em2Text'],
                               relation_type=tmp['label'],
                               score=tmp['score'])

    def insert_query(self):
        with open(self.arg['data_file']) as IN:
            line_num = 0
            for line in IN:
                print line_num
                tmp = line.split('\t')
                target_type = ast.literal_eval(tmp[0])[0]
                if '[' in tmp[3]:
                    output_types = ast.literal_eval(tmp[3])
                else:
                    output_types = [tmp[3]]
                relation_type = tmp[2]
                self.db.insert(self.arg['query_table'],
                               target_type=target_type,
                               output_types=output_types,
                               index=line_num,
                               relation_type=relation_type)
                line_num += 1

    def insert_caseolap(self, index_number):
        with open(self.arg['data_file'] + str(index_number) + '_c.txt') as IN:
            for line in IN:
                tmp = line.split('\t')
                score_list = tmp[1].strip().lstrip('[').rstrip(']').split(',')
                for ele in score_list:
                    temp = ele.strip().split('|')
                    if len(temp) != 2:
                        continue
                    self.db.insert(self.arg['caseolap_table'],
                                   doc_id=index_number,
                                   sub_type=tmp[0],
                                   entity=temp[0],
                                   score=float(temp[1]))

    def query_prediction(self, name_a, name_b, relation_type):
        self.db.query("set statement_timeout TO 0")
        query_string = "SELECT score FROM "+self.arg['prediction_table']+" WHERE entity_a=\'" + name_a +"\' AND entity_b=\'" + \
        name_b + "\' AND relation_type=\'" + relation_type + "\'"
        #query_em_a = "SELECT sent_id FROM"+self.arg['entity_table']+" WHERE entity_name=\'"+name_a+"\' GROUP BY article_id"
        q = self.db.query(query_string)
        if len(q.dictresult()) == 0:
            return 0
        else:
            return q.dictresult()[0]['score']

    def query_prediction_v2(self, name_a, name_b, relation_type):
        query_string = "SELECT score FROM "+self.arg['prediction_table']+" WHERE entity_a=\'" + name_a +"\' AND entity_b=\'" + \
        name_b + "\' AND relation_type=\'" + relation_type + "\'"

        query_ems = "SELECT L.article_title, L.pmid, L.sent FROM entity_table as L INNER JOIN (select sent_id FROM entity_table where entity_name=\'"+name_a+\
         "\') AS R ON L.sent_id=R.sent_id where entity_name=\'"+name_b+"\' LIMIT 1"
        #set_a = set(map(lambda x:x['sent_id'], self.db.query(query_em_a).dictresult()))
        ##query_em_b = "SELECT distinct sent_id FROM entity_table WHERE entity_name=\'"+name_b+"\'"
        #set_b = set(map(lambda x:x['sent_id'], self.db.query(query_em_b).dictresult()))
        #print set_a.intersection(set_b)
        self.db.query("set statement_timeout TO 100")
        try:
            result = self.db.query(query_ems).dictresult()[0]
        except:
            result = {}
        q = self.db.query(query_string)
        if len(q.dictresult()) == 0:
            result['score'] = 0
        else:
            result['score'] = q.dictresult()[0]['score']
        return result
        #print query_string

    def query_distinctive(self,
                          target_type,
                          output_types,
                          relation_type,
                          sub_types,
                          num_records=8):
        self.db.query("set statement_timeout TO 0")
        sub_types = ast.literal_eval(sub_types)
        result = []
        query_string = "SELECT index FROM query_table WHERE target_type @@ \'" + target_type + "\' AND output_types@>\'" +\
        output_types+"\' and relation_type=\'" + relation_type +"\'"
        #print query_string
        idx = self.db.query(query_string).dictresult()[0]['index']
        for sub_type in sub_types:
            query_string = "SELECT entity,score FROM " + self.arg['caseolap_table'] + " WHERE doc_id=" + str(idx) + \
            " AND sub_type @@ \'" + sub_type + "\' ORDER BY score LIMIT " + str(num_records)
            q = self.db.query(query_string)
            result.append(q.dictresult())
        return result

    def query_distinctive_v2(self,
                             target_type,
                             output_types,
                             relation_type,
                             sub_types,
                             num_records=8):
        self.db.query("set statement_timeout TO 0")
        sub_types = ast.literal_eval(sub_types)
        result = []
        pmid_result = []
        query_string = "SELECT index FROM query_table WHERE target_type @@ \'" + target_type + "\' AND output_types@>\'" +\
        output_types+"\' and relation_type=\'" + relation_type +"\'"
        #print query_string
        if 'MeSH' in target_type:
            type_b_name = 'type_b_mesh'
        else:
            type_b_name = 'type_b_umls'

        if 'MeSH' in output_types:
            type_a_name = 'type_a_mesh'
        else:
            type_a_name = 'type_a_umls'
        idx = self.db.query(query_string).dictresult()[0]['index']
        for sub_type in sub_types:
            query_string = "SELECT entity,score FROM " + self.arg['caseolap_table'] + " WHERE doc_id=" + str(idx) + \
            " AND sub_type @@ \'" + sub_type + "\' ORDER BY score LIMIT " + str(num_records)
            q = self.db.query(query_string)

            type_target = sub_type.split('::')[0]
            #print type_target
            #entity_list = q.dictresult()
            pmids = []
            for em in q.dictresult():
                qq = "select pmid from entity_table where sent_id = (select sent_id FROM relation_table WHERE entity_a=\'" +em['entity'] +\
                "\' AND relation_type = '"+relation_type+"' LIMIT 1) LIMIT 1"
                #"\' AND relation_type = '"+relation_type+"' AND "+type_b_name+"@>'{"+type_target+"}' LIMIT 1"
                em_result = self.db.query(qq).dictresult()
                if len(em_result) > 0:
                    em_result[0]['entity'] = em['entity']
                    pmids.append(em_result[0])
            result.append(q.dictresult())
            pmid_result.append(pmids)
        return result, pmid_result

    def insert_record(self):
        cnt = 0
        # 527.8M lines
        with open(self.arg['data_file'], 'r') as IN:
            for line in IN:
                #try:
                if cnt % 5000 == 0:
                    print self.arg['data_file'], "process ", cnt, " lines"
                cnt += 1
                tmp = json.loads(line)
                type_dict = {}
                for e in tmp['entityMentions']:
                    aid = str(tmp['articleId'])
                    mesh = []
                    umls = []
                    go = []
                    sub_type = defaultdict(list)
                    for l in e['label']:
                        if 'MeSH' in l:
                            mesh.append(l.replace('MeSH:::', ''))
                        elif 'GO' in l:
                            go.append(l.replace('GO:::', ''))
                        else:
                            umls.append(l)
                    if len(go) > 0:
                        sub_type['go'] = True
                    else:
                        sub_type['go'] = False
                    if len(mesh) > 0:
                        sub_type['mesh'] = mesh[0].split('::')
                    sub_type['umls'] = umls
                    type_dict[e['start']] = sub_type

                    assert (len(self.pmid_dict[aid]) < 9)
                    self.db.insert(self.arg['entity_table'],
                                   entity_name=e['text'],
                                   sent_id=tmp['sentId'],
                                   article_id=tmp['articleId'],
                                   pmid=self.pmid_dict[aid][0],
                                   article_title=self.pmid_dict[aid][1],
                                   sent=tmp['sentText'])
                for r in tmp['relationMentions']:
                    self.db.insert(
                        self.arg['relation_table'],
                        entity_a=r['em1Text'],
                        entity_b=r['em2Text'],
                        relation_type=r['label'],
                        a_is_gene=type_dict[r['em1Start']]['go'],
                        b_is_gene=type_dict[r['em2Start']]['go'],
                        type_a_umls=type_dict[r['em1Start']]['umls'],
                        type_b_umls=type_dict[r['em2Start']]['umls'],
                        type_a_mesh=type_dict[r['em1Start']]['mesh'],
                        type_b_mesh=type_dict[r['em2Start']]['mesh'],
                        sent_id=tmp['sentId'],
                        article_id=tmp['articleId'])
                #except:
                #print cnt

    def generate_random_walks(self, edge_pairs, num_walks=3, num_steps=3):
        print len(edge_pairs)
        outgoing_edges = defaultdict(list)
        ingoing_edges = defaultdict(list)
        map(lambda x: outgoing_edges[x['entity_a']].append(x['entity_b']),
            edge_pairs)
        map(lambda x: ingoing_edges[x['entity_b']].append(x['entity_a']),
            edge_pairs)
        for k, v in outgoing_edges.iteritems():
            if len(v) > 0:
                v.append(k)
        #print outgoing_edges
        #print ingoing_edges
        #print outgoing_edges
        temp = max(outgoing_edges.values(), key=len)
        origin = temp[-1]
        walk_nodes = [origin]
        total_nodes = set([origin])
        edges = set()
        for i in xrange(num_steps):
            #print "++++++++++++++++++++",i
            next_step = []
            while len(walk_nodes) > 0:
                node = walk_nodes.pop()
                if i % 2 == 0:
                    node_list = outgoing_edges[node][:-1]
                    assert (len(node_list) > 0)
                else:
                    node_list = ingoing_edges[node]
                    assert (len(node_list) > 0)
                #print node
                if len(node_list) <= num_walks:
                    next_step.extend(node_list)
                    edges = edges.union(
                        list(map(lambda x: (node, x), node_list)))
                else:
                    #print "random_sample",node_list
                    random_nodes = random.sample(node_list, num_walks)
                    next_step.extend(random_nodes)
                    edges = edges.union(
                        list(map(lambda x: (node, x), random_nodes)))
            assert (len(walk_nodes) == 0)
            walk_nodes = list(set(next_step))
            total_nodes = total_nodes.union(set(next_step))
        #print total_nodes
        print len(edges)
        return edges
        #print edges

    def query_links_with_walk(self,
                              type_a,
                              type_b,
                              relation_type,
                              num_edges=5,
                              num_pps=1):
        try:
            type_a = ast.literal_eval(type_a)
            type_b = ast.literal_eval(type_b)
        except:
            pass
        #print num_edges
        #query_string = "SELECT * FROM (SELECT DISTINCT ON(entity_a,entity_b) entity_a,entity_b,sent_id  "+"FROM "+self.arg['relation_table']+" WHERE type_a_"+type_a['name']+"@>'"\
        #+type_a['type']+"' AND type_b_"+type_b['name']+"@>'"+ type_b['type']+"' AND relation_type='"+relation_type + "') x ORDER BY RANDOM() LIMIT 500"
        query_string = "SELECT * FROM (SELECT entity_a,entity_b,(array_agg('[' || article_id || ',' || sent_id || ']'))[1:" + str(num_pps) + "]  "+"FROM "+self.arg['relation_table']+" WHERE type_a_"+type_a['name']+"@>'"\
        +type_a['type']+"' AND type_b_"+type_b['name']+"@>'"+ type_b['type']+"' AND relation_type='"+relation_type + "' GROUP BY entity_a,entity_b) x ORDER BY RANDOM() LIMIT " +str(num_edges)
        q = self.db.query(query_string)
        result = {'node_a': {}, 'node_b': {}, 'edges': []}
        for p in list(self.generate_random_walks(q.dictresult())):
            result['node_a'][p[0]] = []
            result['node_b'][p[1]] = []
            result['edges'].append({'source': p[0], 'target': p[1]})
        #print result
        return result

    def get_category_entities(self):
        outputfile = open("category_entities.json", 'w')
        category_entity = dict()
        with open("mesh_type_hierarchy-2016.txt") as fp:
            for line in fp:
                mesh_id = line.split('\t', 1)[0]
                if len(mesh_id.split('.')) == 2:
                    mesh = line.split('\t', 1)[1]
                    query_string = "SELECT entity_name from entity_table_slim where entity_mesh @> '{" + mesh.replace(
                        "'", "''") + "}'"
                    # print query_string
                    entities = self.db.query(query_string)
                    entities = entities.dictresult()
                    if len(entities) == 0:
                        continue
                    category_entity[mesh[:-1]] = []
                    for entity in entities:
                        category_entity[mesh[:-1]].append(
                            entity['entity_name'])

        category_json = json.dumps(category_entity)
        outputfile.write(str(category_json))
        outputfile.close()

    def get_relations(self,
                      type_a,
                      type_b,
                      entities_left=[],
                      entities_right=[]):
        try:
            entities_left = ast.literal_eval(entities_left)
            entities_right = ast.literal_eval(entities_right)
        except:
            pass

        query_string = "SELECT distinct relation_type, entity_a, entity_b FROM relation_table_slim WHERE type_a_mesh @> '{" + type_a.replace(
            "'", "''") + "}' AND type_b_mesh @> '{\
		" + type_b.replace("'", "''") + "}'"
        q = self.db.query(query_string)
        relations = q.dictresult()
        result = set()
        for relation in relations:
            if (not entities_left or
                (relation['entity_a'] in entities_left)) and (
                    (not entities_right) or
                    (relation['entity_b'] in entities_right)):
                result.add(relation['relation_type'])
        return {'relations': list(result)}

    def query_links_by_two_sides_entities(self,
                                          entities_left,
                                          entities_right,
                                          relation_type,
                                          num_edges=5,
                                          num_pps=1):
        query_a = []
        query_b = []
        query_edges = []
        for entity_left in entities_left:
            for entity_right in entities_right:
                query_string = "SELECT (array_agg('[' || article_id || ',' || sent_id || ']'))[1:" + str(
                    num_pps + 2) + "] as sents FROM relation_table WHERE \
				entity_a='" + entity_left + "' and entity_b='" + entity_right + "' and relation_type='" + relation_type + "'"
                q = self.db.query(query_string)
                result = q.dictresult()
                article = dict()
                if result[0]['sents'] != None:
                    for x in result[0]['sents']:
                        x = ast.literal_eval(x)
                        article[x[0]] = str(x[1])
                    print "article: ", article
                    query_a.append(entity_left)
                    query_b.append(entity_right)
                    query_edges.append({
                        'source': entity_left,
                        'target': entity_right,
                        'sids': article.values(),
                        'sents': []
                    })

        red_node = dict()

        for v in query_a:
            tmp = self.db.query(
                "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '"
                + v + "' LIMIT " + str(1))
            dictresult = tmp.dictresult()[0]
            red_node[v] = []
            random_nums = random.sample(
                range(0, len(dictresult['article_titles'])),
                min(len(dictresult['article_titles']), num_pps))
            for i in range(0, len(random_nums)):
                red_node[v].append(
                    (dictresult['article_titles'][random_nums[i]],
                     dictresult['sents'][random_nums[i]],
                     dictresult['pmids'][random_nums[i]]))

        blue_node = dict()

        for v in query_b:
            tmp = self.db.query(
                "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '"
                + v + "' LIMIT " + str(1))
            dictresult = tmp.dictresult()[0]
            blue_node[v] = []
            random_nums = random.sample(
                range(0, len(dictresult['article_titles'])),
                min(len(dictresult['article_titles']), num_pps))
            for i in range(0, len(random_nums)):
                blue_node[v].append(
                    (dictresult['article_titles'][random_nums[i]],
                     dictresult['sents'][random_nums[i]],
                     dictresult['pmids'][random_nums[i]]))

        for edge in query_edges:
            for s_id in edge['sids'][:num_pps]:
                tmp = self.db.query(
                    "SELECT article_title, pmid, sent FROM entity_table WHERE sent_id = '"
                    + s_id + "' LIMIT 1")
                edge['sents'].append(tmp.dictresult()[0])

        return {'node_a': red_node, 'node_b': blue_node, 'edge': query_edges}

    def query_links_by_left_entities(self, entities_left, type_b,
                                     relation_type, num_edges, num_pps):
        self.db.query("set statement_timeout TO 0")
        query_string = "SELECT * FROM (SELECT entity_a,entity_b,(array_agg('[' || article_id || ',' || sent_id || ']'))[1:" + str(
            num_pps + 2
        ) + "] as sents FROM " + self.arg[
            'relation_table'] + " WHERE type_b_" + type_b[
                'name'] + "@>'" + type_b[
                    'type'] + "' AND relation_type='" + relation_type + "' GROUP BY entity_a,entity_b) x"
        q = self.db.query(query_string)
        result = q.dictresult()

        query_a = []
        query_b = []
        query_edges = []
        count = 0

        for r in result:
            if r['entity_a'] in entities_left:
                if count < int(num_edges):
                    count = count + 1
                else:
                    break
            query_a.append(r['entity_a'])
            query_b.append(r['entity_b'])
            article = dict()
            for x in r['sents']:
                x = ast.literal_eval(x)
                article[x[0]] = str(x[1])
            query_edges.append({
                'source': r['entity_a'],
                'target': r['entity_b'],
                'sids': article.values(),
                'sents': []
            })

        red_node = dict()

        for v in query_a:
            tmp = self.db.query(
                "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '"
                + v + "' LIMIT " + str(1))
            dictresult = tmp.dictresult()[0]
            red_node[v] = []
            random_nums = random.sample(
                range(0, len(dictresult['article_titles'])),
                min(len(dictresult['article_titles']), num_pps))
            for i in range(0, len(random_nums)):
                red_node[v].append(
                    (dictresult['article_titles'][random_nums[i]],
                     dictresult['sents'][random_nums[i]],
                     dictresult['pmids'][random_nums[i]]))

        blue_node = dict()

        for v in query_b:
            tmp = self.db.query(
                "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '"
                + v + "' LIMIT " + str(1))
            dictresult = tmp.dictresult()[0]
            blue_node[v] = []
            random_nums = random.sample(
                range(0, len(dictresult['article_titles'])),
                min(len(dictresult['article_titles']), num_pps))
            for i in range(0, len(random_nums)):
                blue_node[v].append(
                    (dictresult['article_titles'][random_nums[i]],
                     dictresult['sents'][random_nums[i]],
                     dictresult['pmids'][random_nums[i]]))

        for edge in query_edges:
            for s_id in edge['sids'][:num_pps]:
                tmp = self.db.query(
                    "SELECT article_title, pmid, sent FROM entity_table WHERE sent_id = '"
                    + s_id + "' LIMIT 1")
                edge['sents'].append(tmp.dictresult()[0])

        return {'node_a': red_node, 'node_b': blue_node, 'edge': query_edges}

    def query_links_by_right_entities(self, type_a, entities_right,
                                      relation_type, num_edges, num_pps):
        self.db.query("set statement_timeout TO 0")
        query_string = "SELECT * FROM (SELECT entity_a,entity_b,(array_agg('[' || article_id || ',' || sent_id || ']'))[1:" + str(
            num_pps + 2
        ) + "] as sents FROM " + self.arg[
            'relation_table'] + " WHERE type_a_" + type_a[
                'name'] + "@>'" + type_a[
                    'type'] + "' AND relation_type='" + relation_type + "' GROUP BY entity_a,entity_b) x"
        q = self.db.query(query_string)
        print "right entities"
        result = q.dictresult()
        query_a = []
        query_b = []
        query_edges = []
        count = 0
        for r in result:
            if r['entity_b'] in entities_right:
                if count < int(num_edges):
                    count = count + 1
                else:
                    break
            print 'entity_a: ', r['entity_a']
            print 'entity_b: ', r['entity_b']
            query_a.append(r['entity_a'])
            query_b.append(r['entity_b'])
            article = dict()
            for x in r['sents']:
                x = ast.literal_eval(x)
                article[x[0]] = str(x[1])
            query_edges.append({
                'source': r['entity_a'],
                'target': r['entity_b'],
                'sids': article.values(),
                'sents': []
            })
        print 'query_a: ', query_a
        print 'query_b: ', query_b
        print 'query_edges: ', query_edges

        red_node = dict()

        for v in query_a:
            tmp = self.db.query(
                "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '"
                + v + "' LIMIT " + str(1))
            dictresult = tmp.dictresult()[0]
            red_node[v] = []
            random_nums = random.sample(
                range(0, len(dictresult['article_titles'])),
                min(len(dictresult['article_titles']), num_pps))
            for i in range(0, len(random_nums)):
                red_node[v].append(
                    (dictresult['article_titles'][random_nums[i]],
                     dictresult['sents'][random_nums[i]],
                     dictresult['pmids'][random_nums[i]]))

        blue_node = dict()

        for v in query_b:
            tmp = self.db.query(
                "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '"
                + v + "' LIMIT " + str(1))
            dictresult = tmp.dictresult()[0]
            blue_node[v] = []
            random_nums = random.sample(
                range(0, len(dictresult['article_titles'])),
                min(len(dictresult['article_titles']), num_pps))
            for i in range(0, len(random_nums)):
                blue_node[v].append(
                    (dictresult['article_titles'][random_nums[i]],
                     dictresult['sents'][random_nums[i]],
                     dictresult['pmids'][random_nums[i]]))

        for edge in query_edges:
            for s_id in edge['sids'][:num_pps]:
                tmp = self.db.query(
                    "SELECT article_title, pmid, sent FROM entity_table WHERE sent_id = '"
                    + s_id + "' LIMIT 1")
                edge['sents'].append(tmp.dictresult()[0])

        return {'node_a': red_node, 'node_b': blue_node, 'edge': query_edges}

    def query_links_by_categories(self, type_a, type_b, relation_type,
                                  num_edges, num_pps):
        self.db.query("set statement_timeout TO 0")
        query_string_v2 = "SELECT * FROM (SELECT entity_a,entity_b,(array_agg('[' || article_id || ',' || sent_id || ']'))[1:" + str(num_pps+2) + "] as sents  "+"FROM "+self.arg['relation_table']+" WHERE type_a_"+type_a['name']+"@>'"\
        +type_a['type']+"' AND type_b_"+type_b['name']+"@>'"+ type_b['type']+"' AND relation_type='"+relation_type + "' GROUP BY entity_a,entity_b) x ORDER BY RANDOM() LIMIT " +str(num_edges)
        print query_string_v2
        q = self.db.query(query_string_v2)

        temp_time = time.time() - self.start_time
        self.start_time = time.time()
        print "------query time cost = ", temp_time

        result = q.dictresult()

        query_a = []
        query_b = []
        query_edges = []
        for r in result:
            print 'entity_a: ', r['entity_a']
            print 'entity_b: ', r['entity_b']
            query_a.append(r['entity_a'])
            query_b.append(r['entity_b'])
            article = dict()
            for x in r['sents']:
                x = ast.literal_eval(x)
                article[x[0]] = str(x[1])
            query_edges.append({
                'source': r['entity_a'],
                'target': r['entity_b'],
                'sids': article.values(),
                'sents': []
            })
        print "query_a: ", query_a
        print "query_b: ", query_b

        red_node = dict()

        for v in query_a:
            # tmp=self.db.query("select article_title, pmid, sent from entity_table where entity_name= '" + v + "' LIMIT "+str(num_pps))
            tmp = self.db.query(
                "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '"
                + v + "' LIMIT " + str(1))
            dictresult = tmp.dictresult()[0]
            red_node[v] = []
            random_nums = random.sample(
                range(0, len(dictresult['article_titles'])),
                min(len(dictresult['article_titles']), num_pps))
            for i in range(0, len(random_nums)):
                red_node[v].append(
                    (dictresult['article_titles'][random_nums[i]],
                     dictresult['sents'][random_nums[i]],
                     dictresult['pmids'][random_nums[i]]))
            # red_node[v] = map(lambda x:(x['article_titles'],x['sents'],x['pmids']),tmp.dictresult())

        blue_node = dict()
        for v in query_b:
            tmp = self.db.query(
                "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '"
                + v + "' LIMIT " + str(1))
            dictresult = tmp.dictresult()[0]
            blue_node[v] = []
            random_nums = random.sample(
                range(0, len(dictresult['article_titles'])),
                min(len(dictresult['article_titles']), num_pps))
            for i in range(0, len(random_nums)):
                blue_node[v].append(
                    (dictresult['article_titles'][random_nums[i]],
                     dictresult['sents'][random_nums[i]],
                     dictresult['pmids'][random_nums[i]]))

        temp_time = time.time() - self.start_time
        self.start_time = time.time()
        print "------query entity_a and entity_b time cost = ", temp_time

        for edge in query_edges:
            for s_id in edge['sids'][:num_pps]:
                tmp = self.db.query(
                    "select article_title, pmid, sent from entity_table where sent_id= '"
                    + s_id + "' LIMIT 1")
                edge['sents'].append(tmp.dictresult()[0])
        temp_time = time.time() - self.start_time
        self.start_time = time.time()
        print "red_node nubmer = ", len(red_node), " blue_node number = ", len(
            blue_node), " edges number = ", len(query_edges)
        print "------query edges time cost = ", temp_time

        return {'node_a': red_node, 'node_b': blue_node, 'edge': query_edges}

    def query_links_v2(self,
                       type_a,
                       type_b,
                       relation_type,
                       entities_left=[],
                       entities_right=[],
                       num_edges=5,
                       num_pps=1):
        try:
            type_a = ast.literal_eval(type_a)
            type_b = ast.literal_eval(type_b)
            entities_left = ast.literal_eval(entities_left)
            entities_right = ast.literal_eval(entities_right)
        except:
            pass
        if len(entities_left) > 0 and len(entities_right) > 0:
            print "two sides"
            result = self.query_links_by_two_sides_entities(
                entities_left, entities_right, relation_type, num_edges,
                num_pps)
        elif len(entities_left) == 0 and len(entities_right) == 0:
            print "categories"
            result = self.query_links_by_categories(type_a, type_b,
                                                    relation_type, num_edges,
                                                    num_pps)
        elif len(entities_left) == 0:
            print "right"
            result = self.query_links_by_right_entities(
                type_a, entities_right, relation_type, num_edges, num_pps)
        elif len(entities_right) == 0:
            print "left"
            result = self.query_links_by_left_entities(entities_left, type_b,
                                                       relation_type,
                                                       num_edges, num_pps)
        nodes = []
        if 'node_a' in result:
            for node_name in result['node_a']:
                node = dict()
                node['name'] = node_name
                node['sents'] = []
                for article in result['node_a'][node_name]:
                    temp = dict()
                    temp['artitle_title'] = article[0]
                    temp['sent'] = article[1]
                    temp['pmid'] = article[2]
                    node['sents'].append(temp)
                node['group'] = 1
                nodes.append(node)
        if 'node_b' in result:
            for node_name in result['node_b']:
                node = dict()
                node['name'] = node_name
                node['sents'] = []
                for article in result['node_b'][node_name]:
                    temp = dict()
                    temp['artitle_title'] = article[0]
                    temp['sent'] = article[1]
                    temp['pmid'] = article[2]
                    node['sents'].append(temp)
                node['group'] = 2
                nodes.append(node)
        # print json.dumps({'edges': result['edge'], 'nodes': nodes})
        return {'edges': result['edge'], 'nodes': nodes}

    def query_links(self,
                    type_a,
                    type_b,
                    relation_type,
                    num_edges=5,
                    num_pps=1):
        #type_a={'mesh':0, 'name':"Chemicals_and_Drugs"}
        try:
            type_a = ast.literal_eval(type_a)
            type_b = ast.literal_eval(type_b)

        except:
            pass
        print "type_a = ", type_a
        print "type_b = ", type_b
        print "relation_type = ", relation_type
        self.db.query("set statement_timeout TO 0")
        #query_string = "SELECT * INTO " +self.identity+ " FROM (SELECT DISTINCT ON(entity_a,entity_b) entity_a,entity_b,sent_id  "+"FROM "+self.arg['relation_table']+" WHERE type_a_"+type_a['name']+"@>'"\
        #+type_a['type']+"' AND type_b_"+type_b['name']+"@>'"+ type_b['type']+"' AND relation_type='"+relation_type + "') x ORDER BY RANDOM() LIMIT " +str(num_edges)
        query_string_v2 = "SELECT * FROM (SELECT entity_a,entity_b,(array_agg('[' || article_id || ',' || sent_id || ']'))[1:" + str(num_pps+2) + "] as sents  "+"FROM "+self.arg['relation_table']+" WHERE type_a_"+type_a['name']+"@>'"\
        +type_a['type']+"' AND type_b_"+type_b['name']+"@>'"+ type_b['type']+"' AND relation_type='"+relation_type + "' GROUP BY entity_a,entity_b) x ORDER BY RANDOM() LIMIT " +str(num_edges)
        print query_string_v2
        q = self.db.query(query_string_v2)

        temp_time = time.time() - self.start_time
        self.start_time = time.time()
        # print "------query time cost = ", temp_time

        result = q.dictresult()
        # print "result length = ", len(result)
        query_a = []
        query_b = []
        query_edges = []
        for r in result:
            query_a.append(r['entity_a'])
            query_b.append(r['entity_b'])
            article = dict()
            for x in r['sents']:
                x = ast.literal_eval(x)
                article[x[0]] = str(x[1])
            query_edges.append({
                'source': r['entity_a'],
                'target': r['entity_b'],
                'sids': article.values(),
                'sents': []
            })

        red_node = dict()
        # print "len query_a = ", len(query_a), "; len query_b = ", len(query_b), "; len query_edges", len(query_edges)
        for v in query_a:
            #print "select distinct on (article_id) article_title, pmid, sent from entity_table where entity_name= '" + v['entity_a'] + "' LIMIT "+str(num_pps)
            #print "select distinct on (article_id) article_title, pmid, sent from entity_table where entity_name= '" + v['entity_a'] + "' LIMIT 2"
            tmp = self.db.query(
                "select article_title, pmid, sent from entity_table where entity_name= '"
                + v + "' LIMIT " + str(num_pps))
            #tmp=self.db.query("select distinct on (article_id) article_title, pmid, sent from entity_table where entity_name= '" + v['entity_a'] + "' LIMIT "+str(num_pps))
            red_node[v] = map(
                lambda x: (x['article_title'], x['sent'], x['pmid']),
                tmp.dictresult())
        #query_b=self.db.query("select entity_b from "+self.identity)
        blue_node = dict()
        for v in query_b:
            #pass
            #print "select distinct on (article_id) article_title, pmid, sent from entity_table where entity_name= '" + v['entity_b'] + "' LIMIT "+str(num_pps)
            tmp = self.db.query(
                "select article_title, pmid, sent from entity_table where entity_name= '"
                + v + "' LIMIT " + str(num_pps))
            #tmp=self.db.query("select distinct on (article_id) article_title, pmid, sent from entity_table where entity_name= '" + v['entity_b'] + "' LIMIT "+str(num_pps))
            blue_node[v] = map(
                lambda x: (x['article_title'], x['sent'], x['pmid']),
                tmp.dictresult())
        temp_time = time.time() - self.start_time
        self.start_time = time.time()
        # print "------query entity_a and entity_b time cost = ", temp_time

        #print red_node,blue_node
        for edge in query_edges:
            for s_id in edge['sids'][:num_pps]:
                tmp = self.db.query(
                    "select article_title, pmid, sent from entity_table where sent_id= '"
                    + s_id + "' LIMIT 1")
                edge['sents'].append(tmp.dictresult()[0])
        temp_time = time.time() - self.start_time
        self.start_time = time.time()
        # print "red_node nubmer = ", len(red_node), " blue_node number = ", len(blue_node), " edges number = ", len(query_edges)
        # print "------query edges time cost = ", temp_time

        #query_edge = "SELECT DISTINCT T.entity_a as source, T.entity_b as target, E.pmid, E.article_title,E.sent FROM " + self.arg['entity_table'] + " AS E INNER JOIN "+\
        #self.identity+" T ON E.sent_id = T.sent_id";
        #query_edges = "SELECT distinct on (R.article_id) R.article_id, R.sent_id from " + self.arg['relation_table']+ " AS R inner join " + self.identity+  \
        #" T on R.entity_a = T.entity_a and R.entity_b = T.entity_b and R.relation_type = \'" + relation_type +"' LIMIT " +str(num_pps)
        #"distinct on (article_id) inner join on entity_a, entity_b, relation_type"
        #q = self.db.query(query_edge)
        #self.db.query("drop table "+self.identity)
        return {'node_a': red_node, 'node_b': blue_node, 'edge': query_edges}
示例#19
0
from pg import DB
PG = DB(dbname='VM',
        host='192.168.1.112',
        port=5432,
        user='******',
        passwd='123456')
sql = "select A.sample_time,A.stat_name,B.stat_rollup,B.unit,A.stat_group,A.entity,A.stat_value from hist_stat_daily A ,stat_counters B where A.stat_id=B.id "
q = PG.query(sql)
rows = q.getresult()
for row in rows:
    PG.insert("vm_state",
              time=row[0],
              stat_name=row[1],
              stat_rollup_type=row[2],
              unit=row[3],
              stat_group=row[4],
              entity=row[5],
              stat_value=row[6])
print("成功")
PG.close()
示例#20
0
class PostGreDBConnector:
    """PostGreDBConnector opens a PostGre DB connection. Different functions allow you to add, delete or update
    documents in PostGre DB."""

    def __init__(self):
        """Connecting to localhost (default host and port [localhost, 4532]) PostGre DB and initializing needed data
            base and tables."""
        try:
            print("Connecting to PostGre DB...")
            self.__db = DB(dbname='testdb', host='localhost', port=5432, user='******', passwd='superuser')
            print("PostGre DB connection successfully built.")
        except ConnectionError:
            print("PostGre DB connection could not be built.")

        self.delete_all_data()
        self.drop_all_tables()

    def close_connection(self):
        self.__db.close()

    def create_schema(self, schema_name):
        self.__db.query("CREATE SCHEMA " + schema_name)
        self.__create_tables(schema_name)
        self.__create_functions(schema_name)

    def __create_tables(self, schema):
        """Create needed tables for RDF parsing."""
        schema += "."
        self._add_table("CREATE TABLE " + schema + "texts (id serial primary key, title text)")
        self._add_table(
            "CREATE TABLE " + schema + "bscale (id serial primary key, bscale text, nominal bool, ordinal bool, interval bool)")
        self._add_table("CREATE TABLE " + schema + "bsort (id serial primary key, bsort text)")
        self._add_table("CREATE TABLE " + schema + "pattern (id serial primary key, pattern text)")
        self._add_table("CREATE TABLE " + schema + "single_pattern (id serial primary key, single_pattern text)")
        self._add_table("CREATE TABLE " + schema + "snippets (id serial primary key, snippet text)")

        # relations
        self._add_table("CREATE TABLE " + schema + "has_attribute (bsort_id int, bscale_id integer[], aggregation int)")
        self._add_table("CREATE TABLE " + schema + "has_object (bscale_id int, pattern_id integer[], aggregation int)")
        self._add_table(
            "CREATE TABLE " + schema + "pattern_single_pattern (pattern_id int, single_pattern_id integer[], aggregation int)")
        self._add_table("CREATE TABLE " + schema + "texts_snippets (text_id int primary key, snippet_id integer[], aggregation int)")
        self._add_table(
            "CREATE TABLE " + schema + "snippet_offsets (id serial primary key,"
            " single_pattern_id int, snippet_id int, offsets integer[][], aggregation int)")

        # adjective and verb extractions
        self._add_table("CREATE TABLE " + schema + "subject_occ (id serial primary key, subject text, count int)")
        self._add_table("CREATE TABLE " + schema + "adjective_occ (id serial primary key, adjective text, count int)")
        self._add_table("CREATE TABLE " + schema + "verb_occ (id serial primary key, verb text, count int)")
        self._add_table("CREATE TABLE " + schema + "object_occ (id serial primary key, object text, count int)")
        self._add_table("CREATE TABLE " + schema + "subject_adjective_occ (id serial primary key, subject int, adjective int, count int, pmi float)")
        self._add_table("CREATE TABLE " + schema + "subject_object_occ (id serial primary key, subject int, object int, count int, pmi float)")
        self._add_table("CREATE TABLE " + schema + "object_verb_occ (id serial primary key, object int, verb int, count int, pmi float)")
        self._add_table("CREATE TABLE " + schema + "subject_verb_occ (id serial primary key, subject int, verb int, count int, pmi float)")

        # correlating pattern
        self._add_table("CREATE TABLE " + schema + "bscale_single_pattern (id serial primary key, bscale_id int, single_pattern_id int, single_pattern text, count int)")
        self._add_table(
            "CREATE TABLE " + schema + "correlating_pattern (id serial primary key, pattern_a int, pattern_b int, count int, pmi float)")

    def __create_functions(self, schema):
        """Create all necessary functions to aggregate the results saved in the database."""
        schema += "."
        self.add_function(schema + "aggregate_texts_snippets", "SELECT text_id, array_length(snippet_id, 1) FROM " + schema + "texts_snippets")
        self.add_function(schema + "aggregate_snippet_offsets", "SELECT id, array_length(offsets, 1) FROM " + schema + "snippet_offsets")

    def add_function(self, name, function):
        """Create a new function in the db."""
        create_function = "CREATE FUNCTION "
        returns = "() RETURNS SETOF RECORD AS "
        lang = " LANGUAGE SQL"
        query = create_function + name + returns + add_quotes(function) + lang
        self.__db.query(query)

    def _add_table(self, query):
        """Create a new table with a query."""
        self.__db.query(query)

    def add_table(self, schema, name, rows):
        """Create a new table with a name and rows given in query form."""
        create_table = "CREATE TABLE "
        query = create_table + schema + "." + name + rows
        self.__db.query(query)

    def insert(self, schema, table, row):
        """Insert a new row element into a specified table."""
        return self.__db.insert(schema + "." + table, row)

    def is_in_table(self, schema, table, where_clause):
        """Returns whether a row already exists in a table or not."""
        select = "SELECT * FROM "
        where = " WHERE "
        q = select + schema + "." + table + where + where_clause
        result = self.__db.query(q).dictresult()
        if len(result) > 0:
            return True
        else:
            return False

    def update(self, schema, table, values, where_clause):
        """Update an entry in a specified table."""
        UPDATE = "UPDATE "
        SET = " SET "
        WHERE = " WHERE "
        query = UPDATE + schema + "." + table + SET + values + WHERE + where_clause
        self.query(query)

    def get(self, schema, table, where_clause, key):
        """Return the key of a specific item in a table."""
        select = "SELECT "
        _from = " FROM "
        where = " WHERE "
        q = select + key + _from + schema + "." + table + where + where_clause
        result = self.__db.query(q).dictresult()
        if len(result) > 0:
            return result[0][key]
        else:
            return None

    def get_data_from_table(self, schema, table):
        """Gets all data available in a specific table."""
        return self.__db.query("SELECT * FROM " + schema + "." + table).dictresult()

    def get_id(self, schema, table, where_clause):
        """Return the id of an item in a table. If found return id number of found item, else None."""
        select = "SELECT id FROM "
        where = " WHERE "
        q = select + schema + "." + table + where + where_clause
        result = self.__db.query(q).dictresult()
        if len(result) > 0:
            return result[0]['id']
        else:
            return None

    def delete_from_table(self, schema, table, row):
        """Delete a row element form a specific table."""
        return self.__db.delete(schema + "." + table, row)

    def delete_data_in_table(self, schema, table):
        """Delete all data in a specific table."""
        self.__db.truncate(schema + "." + table, restart=True, cascade=True, only=False)

    def delete_all_data(self):
        """Deletes all data from all existing tables."""
        tables = self.get_tables()
        for table in tables:
            table_name = str(table)
            self.__db.truncate(table_name, restart=True, cascade=True, only=False)

    def get_tables(self):
        """Get all available tables in the database."""
        return self.__db.get_tables()

    def get_attributes(self, schema, table):
        """Get all attributes of a specified table."""
        return self.__db.get_attnames(schema + "." + table)

    def drop_table(self, schema, table):
        """Drops a specified table."""
        query = "DROP TABLE "
        self.__db.query(query + schema + "." + table)

    def drop_all_tables(self):
        """Drops all existing tables."""
        tables = self.get_tables()
        table_names = ""
        if len(tables) > 0 :
            for ind, table in enumerate(tables):
                if ind == 0:
                    table_names = str(table)
                else:
                    table_names = table_names + ", " + str(table)
            self.__db.query("DROP TABLE " + table_names)
        else:
            print("Nothing to delete.")

    def get_all(self, schema, table, attribute):
        """Gets one or more attributes of all entries from a specified table."""
        select = "SELECT "
        _from = " FROM "
        query = select + attribute + _from + schema + "." + table
        return self.__db.query(query).dictresult()

    def query(self, query):
        """Sends a query to the database."""
        result = self.__db.query(query)
        if result is not None:
            if not isinstance(result, str):
                return result.dictresult()
        else:
            return result
 if exist == []:
     #insert
     curId = curId + 1
     venue["id"] = curId
     venue["venue_name"] = row["venue_name"]
     venue["venue_add_comp"] = row["venue_add_comp"]
     if row["venue_add_comp"] is not None:
         (add, lat, lon) = geocode(FormatAddress(row["venue_add_comp"]))
     else:
         add = None
         lat = None
         lon = None
     venue["venue_address"] = add
     venue["lat"] = lat
     venue["lon"] = lon
     db.insert('city.venues', venue)
     print('INSERT VENUE', row["venue_name"])
     row["venue_id"] = curId
 elif None in exist[0][0:4]:
     #update
     venue["id"] = exist[0][2]
     row["venue_id"] = exist[0][2]
     venue["venue_name"] = row["venue_name"]
     venue["venue_add_comp"] = row["venue_add_comp"]
     if row["venue_add_comp"] is not None:
         (add, lat, lon) = geocode(FormatAddress(row["venue_add_comp"]))
     else:
         add = None
         lat = None
         lon = None
     venue["venue_address"] = add