Exemplos de DB.query em Python, exemplos de pg.DB.query em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: redshift_unload_copy.py Projeto: dariemp/amazon-redshift-utils

def conn_to_rs(host, port, db, usr, pwd, opt=options, timeout=set_timeout_stmt):
    rs_conn_string = """host=%s port=%s dbname=%s user=%s password=%s
                         %s""" % (host, port, db, usr, pwd, opt)
    print "Connecting to %s:%s:%s as %s" % (host, port, db, usr)
    rs_conn = DB(dbname=rs_conn_string)
    rs_conn.query(timeout)
    return rs_conn

Exemplo n.º 2

0

Exibir arquivo

Arquivo: blip_api.py Projeto: stanreport/bdit_data-sources

def move_data(dbset):
    try:
        db = DB(**dbset)
        db.begin()
        query = db.query("SELECT bluetooth.move_raw_data();")
        if query.getresult()[0][0] != 1:
            db.rollback()
            raise DatabaseError(
                'bluetooth.move_raw_data did not complete successfully')
        query = db.query("TRUNCATE bluetooth.raw_data;")
        query = db.query("SELECT king_pilot.load_bt_data();")
        if query.getresult()[0][0] != 1:
            db.rollback()
            raise DatabaseError(
                'king_pilot.load_bt_data did not complete successfully')
        db.query(
            'DELETE FROM king_pilot.daily_raw_bt WHERE measured_timestamp < now()::DATE;'
        )
        db.commit()
    except DatabaseError as dberr:
        LOGGER.error(dberr)
        db.rollback()
    except IntegrityError:
        LOGGER.critical(
            'Moving data failed due to violation of a constraint. Data will have to be moved manually'
        )
    finally:
        db.close()

Exemplo n.º 3

0

Exibir arquivo

def requestDB(requete='select * from ',tableName='planet_osm_line',base='champ',
    limit=None,only=False,verbose=False):
    """
    Function that execute an sql request into a postgres database.
     ---
    parameters :
        string requete : the request will be executed.
        string tableName : the name of the table will be request if only is false.
        string base : name of the database
        int limit : integer, request limit option.
    produces :
        string requeteResult : request result.
    """
    db = DB(dbname=base)
    
    if verbose :
        tablesList = db.get_tables()
        print('Tables :')
        printPuces(tablesList)
        print('\n')
        
        headerLinesOSM = db.get_attnames(tableName) 
        print('Table header :')
        printPuces(headerLinesOSM)
        print('\n')
    
    if only :
        requeteResult = db.query(requete).getresult()
    elif limit is None :
        requeteResult = db.query(requete+tableName).getresult()
    else :
        requeteResult = db.query(requete+tableName+' limit '+str(limit)).getresult()
    
    return requeteResult

Exemplo n.º 4

0

Exibir arquivo

def run_db_query(
        *,
        database: str,
        host: str,
        port: int,
        username: str,
        password: str,
        query: str
) -> None:
    logging.info("Initializing DB connection")
    logging.info(
        f"DB params dbname={database}, host={host}, "
        f"port={port}, user={username}"
    )
    logging.info(f"query={query}")

    db = DB(
        dbname=database,
        host=host,
        port=port,
        user=username,
        passwd=password
    )

    logging.info("DB connection initialized")

    logging.info("Executing query")

    db.query(query)

    logging.info("Query executed")

Exemplo n.º 5

0

Exibir arquivo

Arquivo: analyzedb.py Projeto: pgyogesh/postgres

def run_analyze(table):
    global counter
    db = DB(dbname = vDatabase, host = vHost)
    db.query('analyze %s' %table)
    with counter.get_lock():
        counter.value += 1
    if counter.value % 10 == 0 or counter.value == total_tables:
        logging.info(str(counter.value) + " tables completed out of " + str(total_tables) + " tables")
    db.close()

Exemplo n.º 6

0

Exibir arquivo

class SqlClient:

    def __init__(self):
        self.db = DB(dbname='refugee', host='localhost', user='******', passwd='1234')

    def insert_into(self, fingerprint_hash, name, dob, gender):
        res = self.db.query('INSERT INTO public.demographics(name, birth_date, fingerprint_hash, gender) \n VALUES(' + str(name) + ', ' +  str(dob) + ', ' + str(fingerprint_hash) + ', ' + str(gender) + ');')

    def lookUp(self, hash):
        res = self.db.query(f'SELECT * FROM public.demographics WHERE fingerprint_hash == {hash}')
        return res[0]

Exemplo n.º 7

0

Exibir arquivo

Arquivo: blip_api.py Projeto: stanreport/bdit_data-sources

def update_configs(all_analyses, dbset):
    '''
    Syncs configs from blip server with database and returns configs to pull 
    data from. 
    :param all_analyses:
        List of blip configurations
    :param dbset:
        Dictionary to connect to PostgreSQL database
    '''

    db = DB(**dbset)
    db.begin()
    db.query('''TRUNCATE bluetooth.all_analyses_day_old;
    INSERT INTO bluetooth.all_analyses_day_old SELECT * FROM bluetooth.all_analyses;'''
             )
    db.commit()
    analyses_pull_data = {}
    for report in all_analyses:
        report.outcomes = [outcome.__json__() for outcome in report.outcomes]
        report.routePoints = [
            route_point.__json__() for route_point in report.routePoints
        ]
        row = dict(device_class_set_name=report.deviceClassSetName,
                   analysis_id=report.id,
                   minimum_point_completed=db.encode_json(
                       report.minimumPointCompleted.__json__()),
                   outcomes=report.outcomes,
                   report_id=report.reportId,
                   report_name=report.reportName,
                   route_id=report.routeId,
                   route_name=report.routeName,
                   route_points=report.routePoints)
        #If upsert fails, log error and continue, don't add analysis to analyses to pull
        try:
            upserted = db.upsert('bluetooth.all_analyses',
                                 row,
                                 pull_data='included.pull_data')
            analyses_pull_data[upserted['analysis_id']] = {
                'pull_data': upserted['pull_data'],
                'report_name': upserted['report_name']
            }
        except IntegrityError as err:
            LOGGER.error(err)

    db.close()

    analyses_to_pull = {
        analysis_id: analysis
        for (analysis_id, analysis) in analyses_pull_data.items()
        if analysis['pull_data']
    }
    return analyses_to_pull

Exemplo n.º 8

0

Exibir arquivo

Arquivo: pa.py Projeto: swc1992/kuberdock-platform

 def healthcheck(self):
     self._generic_healthcheck()
     spec = self.get_spec()
     env = {e['name']: e['value'] for e in spec['containers'][0]['env']}
     user = env['POSTGRES_USER']
     passwd = env['POSTGRES_PASSWORD']
     db = DB(dbname=user,
             host=self.host,
             port=5432,
             user=user,
             passwd=passwd)
     sql = "create table test_table(id serial primary key, name varchar)"
     db.query(sql)
     assert_in('public.test_table', db.get_tables())

Exemplo n.º 9

0

Exibir arquivo

Arquivo: manualInsert.py Projeto: syuja/GreenPlumSetup

def read_line():
	i = 0 
	reader = csv.reader(f, delimiter='\t')
	db = DB(dbname="ngram" ,user="******" , port=5432)
	for row in reader: 
		#ngram, year, match_count, page_count, volume_count
		sql = sqlp1 + " " + tb_name + " " + sqlp2 + "  (" + str(i) + ", " \
		+  "\'" + row[0] + "\'" + ", " + row[1] + ", " + row[2] + ", " + row[3] \
		+ ", " +  row[4] + ");"; 
		print sql
		i = i + 1
		#call insert(sql)
		db.query(sql)	
		db.close()

Exemplo n.º 10

0

Exibir arquivo

Arquivo: ticketMasterAPICall.py Projeto: stanreport/bdit_data-sources

def main(**kwargs):
    CONFIG = configparser.ConfigParser()
    CONFIG.read('db.cfg')
    dbset = CONFIG['DBSETTINGS']

    logger.info('Connecting to Database')
    db = DB(dbname=dbset['database'],
            host=dbset['host'],
            user=dbset['user'],
            passwd=dbset['password'])
    proxies = {'https': kwargs.get('proxy', None)}

    # Update Venue List
    venues = []
    curId = db.query('SELECT max(id) FROM city.venues').getresult()[0][0]

    logger.info('Updating venues table')
    venues, inserted_venues = update_venues(db, proxies, curId)

    # Get Events from List of Venues
    #cla = []
    logger.info('Finished updating venues tables, %s new venues inserted',
                inserted_venues)

    inserted_count = update_events(db, proxies, venues)
    logger.info('Finished processing events, %s events inserted',
                inserted_count)
    db.close()

Exemplo n.º 11

0

Exibir arquivo

class db_query:
    def __init__(self, auth):
        self.auth = auth
        self.connect()

    def __del__(self):
        try:
            self.db.close()
        except:
            pass

    def connect(self):
        self.db = DB(dbname=self.auth['db'],
                     host=self.auth['host'],
                     port=self.auth['port'],
                     user=self.auth['user'],
                     passwd=self.auth['passwd'])

    def run_query(self, query, *args):
        def send_query(q):
            return self.db.query(q)

        try:
            res = send_query(query)
        except:
            self.connect()
            res = send_query(query)
        return res

    def array_query(self, query, *args):
        res = self.run_query(query)
        return res.dictresult()

Exemplo n.º 12

0

Exibir arquivo

class PostgresqlAdapter(object):
    def __init__(self, dbname, user, passwd, host="localhost", port=5432):
        self._db = DB(dbname=dbname,
                      host=host,
                      port=port,
                      user=user,
                      passwd=passwd)

    def create(self, inhabitant):
        record = self._db.insert('inhabitants', data=inhabitant)
        return _normalize(record)

    def find_all(self):
        all_inhabitants = self._db.query(
            'select * from inhabitants').dictresult()
        return [_normalize(x) for x in all_inhabitants]

    def find_by_id(self, id):
        inhabitant = self._find_by_id(id)
        return _normalize(inhabitant)

    def find_by_name(self, name):
        all_inhabitants = self._db.query(
            "SELECT * FROM inhabitants " +
            "WHERE data ->> 'FirstName' ILIKE '%' || $1 || '%' OR " +
            "      data ->> 'LastName' ILIKE '%' || $1 || '%'",
            (name, )).dictresult()
        return [_normalize(x) for x in all_inhabitants]

    def delete(self, id):
        try:
            self._db.delete('inhabitants', id=int(id))
        except ValueError:
            pass

    def update(self, inhabitant):
        id = inhabitant["Id"]
        self._find_by_id(id)
        del inhabitant["Id"]
        stored = self._db.update('inhabitants', id=id, data=inhabitant)
        return _normalize(stored)

    def _find_by_id(self, id):
        try:
            return self._db.get('inhabitants', int(id))
        except (ValueError, DatabaseError):
            raise NoRecordFound()

Exemplo n.º 13

0

Exibir arquivo

def get_data(from_table, key, value):
    pg_db = DB(dbname=DATABASE_NAME,
               host=DATABASE_URL,
               port=int(DATABASE_PORT),
               user=DATABASE_USERNAME,
               passwd=DATABASE_PASSWORD)
    result = pg_db.query("select " + key + " , " + value + " from " +
                         from_table + "")
    return [convert_tuples(tuple, key, value) for tuple in result.getresult()]

Exemplo n.º 14

0

Exibir arquivo

Arquivo: puller.py Projeto: M3J/tweesion-data-puller

def db_input(tweet):
	tweet = tweet.split(" ");
	hashtags = []
	words = []
	for word in tweet:
		if len(word) > 3 and special_match(word, plaintext) == True:
			word = word.lower()
			if "#" == word[0]:
				hashtags.append(word)
			else:
				words.append(word)
	insert_update = ("with w as (insert into words (word) values ('{}') on conflict (word) do update set word = words.word returning (wid)), h as (insert into hashtags (hashtag) values ('{}') on conflict (hashtag) do update set hashtag = hashtags.hashtag returning (hid)) insert into links (wid, hid, weight) values ((select * from w), (select * from h), 1) on conflict (wid, hid) do update set weight = links.weight + 1")
	db = DB(dbname=database, host=host, post=int(port), user=user, passwd=password)
	db.begin()
	for word in words:
		for hashtag in hashtags:
			db.query(insert_update.format(word, hashtag))
	db.commit()

Exemplo n.º 15

0

Exibir arquivo

Arquivo: parseEventJSON.py Projeto: webgisgeek/bdit_data-sources

def main(**kwargs):

    CONFIG = configparser.ConfigParser()
    CONFIG.read('db.cfg')
    dbset = CONFIG['DBSETTINGS']

    logger.info('Connecting to Database')
    db = DB(dbname=dbset['database'],
            host=dbset['host'],
            user=dbset['user'],
            passwd=dbset['password'])

    proxies = {'http': kwargs.get('proxy', None)}

    logger.info('Requesting data')
    r = requests.get(
        'http://app.toronto.ca/cc_sr_v1_app/data/edc_eventcal_APR',
        proxies=proxies)

    events = r.json()

    global CURID, ODID
    CURID = db.query('SELECT max(id) FROM city.venues').getresult()[0][0]
    ODID = db.query('SELECT max(od_id) FROM city.od_venues').getresult()[0][0]

    logger.info('Processing events')
    inserted_events, inserted_venues, updated_venues = 0, 0, 0

    for i, entry0 in enumerate(events):
        try:
            inserted_venue, updated_venue = process_event(
                i, entry0['calEvent'], db)
            inserted_events += 1
            inserted_venues += inserted_venue
            updated_venues += updated_venue
        except KeyError as key_error:
            logger.error('Key error with event: %s, key %s, skipping',
                         entry0['calEvent'].get('eventName',
                                                ''), key_error.args[0])

    logger.info('%s events processed, %s venues inserted, %s venues updated',
                inserted_events, inserted_venues, updated_venues)
    logger.info('closing connection to DB')
    db.close()

Exemplo n.º 16

0

Exibir arquivo

Arquivo: util.py Projeto: liuluchen/MIMICIII_death_prediction_dataset

def connect():
    parser = argparse.ArgumentParser()
    parser.add_argument('--host')
    parser.add_argument('--user')
    parser.add_argument('--passwd')
    parser.add_argument('--schema', default = 'mimiciii')
    parser.add_argument('--mode', default = 'all')
    args = parser.parse_args()

    # host = '162.105.146.246'
    # host = 'localhost'
    # schema = 'mimiciii'
    
    host = args.host
    user = args.user
    passwd = args.passwd
    logging.info('connect to %s, user = %s, search_path = %s' %(host, user, args.schema))
    db = DB(host = host, user = user, passwd = passwd)
    db.query('set search_path to %s' %(args.schema))
    return db

Exemplo n.º 17

0

Exibir arquivo

Arquivo: extract_from_TTB.py Projeto: rishabnayak/Synergy

def get_csv_data():
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # read connection parameters
        params = config()

        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = DB(**params)

        # execute a statement
        print('PostgreSQL ALL accepted users who signed up for team matching')
        q = conn.query(
            "SELECT * FROM users JOIN event_applications ON users.id = event_applications.user_id WHERE custom_fields ->> 'team_forming' = 'Yes, sign me up!' AND status = 'accepted';"
        )

        data = q.dictresult()

        f = StringIO()
        writer = csv.writer(f,
                            delimiter=',',
                            quotechar='"',
                            quoting=csv.QUOTE_MINIMAL)
        writer.writerow(features)

        print(f'Adding {len(data)} entries to csv file')

        for row in data:
            str_id = str(row['user_id'])
            # get the MD5 hash of id
            result = hashlib.md5(str_id.encode())
            hashed_id = result.hexdigest()

            full_duration = (row['custom_fields'].get(
                'arrival_time', '') == "I'm staying for the entire event")
            user_features = [hashed_id, row['first_name'], row['last_name'], row['email'], row['phone'], full_duration, \
                           row['age'], row['pronoun'], row['university'], row['education_lvl'], row['major'], \
                           row['grad_year'], row['custom_fields'].get("travel", None), row['custom_fields'].get("programming_skills", None), \
                           row['custom_fields'].get("been_to_ttb", None), None, None, row['custom_fields'].get("linkedin_url", None), \
                           row['custom_fields'].get("github_url", None), row['custom_fields'].get("other_url", None), row['custom_fields'].get("how_did_you_hear", None), \
                           None, row['custom_fields'].get("programming_experience", None), row['custom_fields'].get("how_many_hackathons", None), None, \
                           row['custom_fields'].get("other_skills", None), row['custom_fields'].get("particular_topic", None), row['custom_fields'].get("goals", None), \
                           row['custom_fields'].get("experience_area", None), row['custom_fields'].get("teammate_preference", None), None]
            writer.writerow(user_features)
        # move the pointer back to beginning of file
        f.seek(0)
        return f
    except (Exception) as error:
        print("Error:", error)
    finally:
        if conn is not None:
            conn.close()
            print('Database connection closed.')

Exemplo n.º 18

0

Exibir arquivo

Arquivo: main.py Projeto: DataDrivenAngel/angelnet_server

def read_data(device_id: str, temp: Optional[float], humid: Optional[float],
              light: Optional[float]):

    db = DB(dbname=dbname,
            host=dbhost,
            port=dbport,
            user=dbuser,
            passwd=dbpassword)
    table = "raw_data"
    local_received_time = datetime.datetime.utcnow()

    insert_statement = f"""INSERT INTO {table} (device_id,temp,humidity,light,timestamp_on_write)
                        VALUES ('{device_id}',{temp},{humid},{light},'{local_received_time}');
                        """

    # Write to PG DB
    try:
        db.query(insert_statement)
    except:
        pass

    # Push to Power BI if feature flag is True
    if stream_pb == True:
        try:
            payload = f"""
                [{{
                    "DateTime" :"{local_received_time}",
                    "Temperature" :{temp},
                    "Humidity" :{humid},
                    "Brightness" :{light},
                    "Device" :"{device_id}"
                }}]
                """

            body = bytes(payload, encoding='utf-8')
            http_req = urllib2.Request(streaming_url, body)
            response = urllib2.urlopen(http_req)
        except:
            print("Power BI ERROR")
        return ("received")

Exemplo n.º 19

0

Exibir arquivo

class Store:
    def __init__(self):
        self.connection = DB(dbname=environ.get('POSTGRESQL_ADDON_DB'),
                             host=environ.get('POSTGRESQL_ADDON_HOST'),
                             port=int(environ.get('POSTGRESQL_ADDON_PORT')),
                             user=environ.get('POSTGRESQL_ADDON_USER'),
                             passwd=environ.get('POSTGRESQL_ADDON_PASSWORD'))
        try:
            self.connection.query("SELECT * FROM stats")
        except ProgrammingError:
            print "Create Table"
            self.connection.query("""CREATE TABLE stats (timestamp TIMESTAMP PRIMARY KEY,value NUMERIC(5,2))""")
            # self.connection.query("truncate stats")

    def add(self, value):
        if float(value) > 0.1:
            self.connection.insert('stats', timestamp=get_current_time(), value=value)

    def display(self):
        self.connection.query("SELECT * FROM stats")

    def get_values(self, value='{}-{}-{}'.format(datetime.now().year, datetime.now().month, datetime.now().day)):
        result = self.connection.query(
            "SELECT extract(DOW FROM timestamp)::INT AS dow, timestamp, value FROM stats WHERE timestamp >= '" +
            value + "' AND timestamp < ('" + value + "'::DATE + '1 day'::INTERVAL) ORDER BY 2")
        return result

    def get_average_values(self,
                           value='{}-{}-{}'.format(datetime.now().year, datetime.now().month, datetime.now().day)):
        result = self.connection.query(
            "SELECT extract(DOW FROM timestamp)::INT AS dow, "
            "to_timestamp('" + value + " ' || "
                                       "LPAD(extract(HOUR FROM timestamp)::TEXT,2,'0') || ':' ||"
                                       "        LPAD(extract(MINUTE FROM timestamp)::TEXT,2,'0') ||"
                                       " ':00', 'YYYY-MM-DD HH24:MI:SS') AS timestamp,"
                                       "       round(avg(value)::NUMERIC,2) AS value FROM stats"
                                       " GROUP BY 1,2 ORDER BY 1,2")

        return result

Exemplo n.º 20

0

Exibir arquivo

Arquivo: DBGraffle4.py Projeto: skwash/DBGraffle

def getSchemaInfo( options, sql_tables, sql_references ):
	"""
		Connect to the database and retrieve our schema information.
	"""
	conn = DB( options[ 'dbname' ], options[ 'dbhost' ], int( options[ 'dbport' ] ), user=options[ 'dbuser' ] ) 
	res = conn.query( options[ 'query' ] ).dictresult()

	for i in range( len( res ) ):
		ftbl		= res[i][ 'table_name' ]
		fcol		= res[i][ 'column_name' ]
		type		= res[i][ 'data_type' ] 
		nullable	= res[i][ 'is_nullable' ]
		keytype		= res[i][ 'constraint_type' ]
		ttbl		= res[i][ 'referenced_table_name' ]
		tcol		= res[i][ 'referenced_column_name' ]

		if not sql_tables.has_key( ftbl ):
			sql_tables[ ftbl ] = []

		sql_tables[ ftbl ] += [ [ fcol, type, nullable, keytype ] ] 

		if keytype == 'FOREIGN KEY' :
			sql_references += [ [ ftbl, fcol, ttbl, tcol ] ]

Exemplo n.º 21

0

Exibir arquivo

Arquivo: util.py Projeto: xunzhang/happyday

class PGDB(object):
    def __init__(self):
        pass

    def connect(self, dbname, host, port, user):
        self.db = DB(dbname=dbname, host=host, port=port, user=user)

    def connect_default(self):
        self.db = DB(dbname=DBNAME, host=HOST, port=PORT, user=USER)

    def close(self):
        self.db.close()
    
    def execute(self, sql):
        return self.db.query(sql)
    
    def drop_table(self, tbl):
        self.execute('DROP TABLE if exists %s' % tbl)

    def create_init_table(self, tbl):
        self.execute('CREATE TABLE %s (uid INT, iid INT, rating REAL)' % tbl)

    def copy(self, tbl, path, delimiter):
        self.execute("COPY %s FROM '%s' delimiter '%s'" % (tbl, path, delimiter))

Exemplo n.º 22

0

Exibir arquivo

Arquivo: users.py Projeto: Dimiys/skript

src = cfg[args.src]

print("")
print("------------------------------------------------------------------------------------")
print("Source: " + str(args.src))
print("------------------------------------------------------------------------------------")

srcdb = DB(dbname=src["db"], host=src["host"], port=int(src["port"]), user=src["user"], passwd=src["password"])

for srv in args.dst:
    item = cfg[srv]
    print("")
    print("------------------------------------------------------------------------------------")
    print("Destination: " + str(srv))
    print("------------------------------------------------------------------------------------")
    dstdb = DB(dbname=item["db"], host=item["host"], port=int(item["port"]), user=item["user"], passwd=item["password"])

    for table in tables:
        dstdb.start()
        rows = srcdb.query('SELECT * FROM %s' % table).getresult()
        dstdb.query('CREATE TEMPORARY TABLE newvals ON COMMIT DROP AS TABLE %s WITH NO DATA' % table)
        dstdb.inserttable('newvals', rows)
        dstdb.query('LOCK TABLE %s IN EXCLUSIVE MODE' % table)
        print(upd.get(table))
        dstdb.query(upd.get(table))
        print(insert.get(table))
        dstdb.query(insert.get(table))
        dstdb.commit()

Exemplo n.º 23

0

Exibir arquivo

from pg import DB
import random

import configparser
config = configparser.ConfigParser()
config.read('config.ini')

db = DB(host=config['mysqlDB']['hostname'],
        user=config['mysqlDB']['username'],
        passwd=config['mysqlDB']['password'],
        dbname=config['mysqlDB']
        ['database'])  #connect to DB with given credentials and information


def new_user_id():
    user_rand_gen_id = random.randint(100000000, 1000000000)
    return user_rand_gen_id


print(db.query("SELECT * FROM users"))

Exemplo n.º 24

0

Exibir arquivo

Arquivo: db_utils.py Projeto: afcarl/StructMineDataInfra

class data_utils(object):
    """docstring for db_utils"""
    def __init__(self, arg):
        #super(db_utils, self).__init__()
        self.arg = arg
        self.db = DB(dbname='structnet_complete',
                     user='******',
                     passwd='structnet',
                     host='localhost')
        #self.identity = ''.join(random.choice(string.ascii_lowercase) for _ in range(8))
        #print self.identity
        print "Conncted!"
        self.start_time = time.time()

    def load_pmids(self):
        f = open(self.arg['corpus_map'], 'rb')
        self.pmid_dict = marshal.load(f)
        f.close()
        print "PMID loaded!"

    def insert_prediction(self):
        cnt = 0
        # 527.8M lines
        with open(self.arg['data_file'], 'r') as IN:
            for line in IN:
                #try:
                if cnt % 50000 == 0:
                    print self.arg['data_file'], "process ", cnt, " lines"
                cnt += 1
                tmp = json.loads(line)
                self.db.insert(self.arg['prediction_table'],
                               entity_a=tmp['em1Text'],
                               entity_b=tmp['em2Text'],
                               relation_type=tmp['label'],
                               score=tmp['score'])

    def insert_query(self):
        with open(self.arg['data_file']) as IN:
            line_num = 0
            for line in IN:
                print line_num
                tmp = line.split('\t')
                target_type = ast.literal_eval(tmp[0])[0]
                if '[' in tmp[3]:
                    output_types = ast.literal_eval(tmp[3])
                else:
                    output_types = [tmp[3]]
                relation_type = tmp[2]
                self.db.insert(self.arg['query_table'],
                               target_type=target_type,
                               output_types=output_types,
                               index=line_num,
                               relation_type=relation_type)
                line_num += 1

    def insert_caseolap(self, index_number):
        with open(self.arg['data_file'] + str(index_number) + '_c.txt') as IN:
            for line in IN:
                tmp = line.split('\t')
                score_list = tmp[1].strip().lstrip('[').rstrip(']').split(',')
                for ele in score_list:
                    temp = ele.strip().split('|')
                    if len(temp) != 2:
                        continue
                    self.db.insert(self.arg['caseolap_table'],
                                   doc_id=index_number,
                                   sub_type=tmp[0],
                                   entity=temp[0],
                                   score=float(temp[1]))

    def query_prediction(self, name_a, name_b, relation_type):
        self.db.query("set statement_timeout TO 0")
        query_string = "SELECT score FROM "+self.arg['prediction_table']+" WHERE entity_a=\'" + name_a +"\' AND entity_b=\'" + \
        name_b + "\' AND relation_type=\'" + relation_type + "\'"
        #query_em_a = "SELECT sent_id FROM"+self.arg['entity_table']+" WHERE entity_name=\'"+name_a+"\' GROUP BY article_id"
        q = self.db.query(query_string)
        if len(q.dictresult()) == 0:
            return 0
        else:
            return q.dictresult()[0]['score']

    def query_prediction_v2(self, name_a, name_b, relation_type):
        query_string = "SELECT score FROM "+self.arg['prediction_table']+" WHERE entity_a=\'" + name_a +"\' AND entity_b=\'" + \
        name_b + "\' AND relation_type=\'" + relation_type + "\'"

        query_ems = "SELECT L.article_title, L.pmid, L.sent FROM entity_table as L INNER JOIN (select sent_id FROM entity_table where entity_name=\'"+name_a+\
         "\') AS R ON L.sent_id=R.sent_id where entity_name=\'"+name_b+"\' LIMIT 1"
        #set_a = set(map(lambda x:x['sent_id'], self.db.query(query_em_a).dictresult()))
        ##query_em_b = "SELECT distinct sent_id FROM entity_table WHERE entity_name=\'"+name_b+"\'"
        #set_b = set(map(lambda x:x['sent_id'], self.db.query(query_em_b).dictresult()))
        #print set_a.intersection(set_b)
        self.db.query("set statement_timeout TO 100")
        try:
            result = self.db.query(query_ems).dictresult()[0]
        except:
            result = {}
        q = self.db.query(query_string)
        if len(q.dictresult()) == 0:
            result['score'] = 0
        else:
            result['score'] = q.dictresult()[0]['score']
        return result
        #print query_string

    def query_distinctive(self,
                          target_type,
                          output_types,
                          relation_type,
                          sub_types,
                          num_records=8):
        self.db.query("set statement_timeout TO 0")
        sub_types = ast.literal_eval(sub_types)
        result = []
        query_string = "SELECT index FROM query_table WHERE target_type @@ \'" + target_type + "\' AND output_types@>\'" +\
        output_types+"\' and relation_type=\'" + relation_type +"\'"
        #print query_string
        idx = self.db.query(query_string).dictresult()[0]['index']
        for sub_type in sub_types:
            query_string = "SELECT entity,score FROM " + self.arg['caseolap_table'] + " WHERE doc_id=" + str(idx) + \
            " AND sub_type @@ \'" + sub_type + "\' ORDER BY score LIMIT " + str(num_records)
            q = self.db.query(query_string)
            result.append(q.dictresult())
        return result

    def query_distinctive_v2(self,
                             target_type,
                             output_types,
                             relation_type,
                             sub_types,
                             num_records=8):
        self.db.query("set statement_timeout TO 0")
        sub_types = ast.literal_eval(sub_types)
        result = []
        pmid_result = []
        query_string = "SELECT index FROM query_table WHERE target_type @@ \'" + target_type + "\' AND output_types@>\'" +\
        output_types+"\' and relation_type=\'" + relation_type +"\'"
        #print query_string
        if 'MeSH' in target_type:
            type_b_name = 'type_b_mesh'
        else:
            type_b_name = 'type_b_umls'

        if 'MeSH' in output_types:
            type_a_name = 'type_a_mesh'
        else:
            type_a_name = 'type_a_umls'
        idx = self.db.query(query_string).dictresult()[0]['index']
        for sub_type in sub_types:
            query_string = "SELECT entity,score FROM " + self.arg['caseolap_table'] + " WHERE doc_id=" + str(idx) + \
            " AND sub_type @@ \'" + sub_type + "\' ORDER BY score LIMIT " + str(num_records)
            q = self.db.query(query_string)

            type_target = sub_type.split('::')[0]
            #print type_target
            #entity_list = q.dictresult()
            pmids = []
            for em in q.dictresult():
                qq = "select pmid from entity_table where sent_id = (select sent_id FROM relation_table WHERE entity_a=\'" +em['entity'] +\
                "\' AND relation_type = '"+relation_type+"' LIMIT 1) LIMIT 1"
                #"\' AND relation_type = '"+relation_type+"' AND "+type_b_name+"@>'{"+type_target+"}' LIMIT 1"
                em_result = self.db.query(qq).dictresult()
                if len(em_result) > 0:
                    em_result[0]['entity'] = em['entity']
                    pmids.append(em_result[0])
            result.append(q.dictresult())
            pmid_result.append(pmids)
        return result, pmid_result

    def insert_record(self):
        cnt = 0
        # 527.8M lines
        with open(self.arg['data_file'], 'r') as IN:
            for line in IN:
                #try:
                if cnt % 5000 == 0:
                    print self.arg['data_file'], "process ", cnt, " lines"
                cnt += 1
                tmp = json.loads(line)
                type_dict = {}
                for e in tmp['entityMentions']:
                    aid = str(tmp['articleId'])
                    mesh = []
                    umls = []
                    go = []
                    sub_type = defaultdict(list)
                    for l in e['label']:
                        if 'MeSH' in l:
                            mesh.append(l.replace('MeSH:::', ''))
                        elif 'GO' in l:
                            go.append(l.replace('GO:::', ''))
                        else:
                            umls.append(l)
                    if len(go) > 0:
                        sub_type['go'] = True
                    else:
                        sub_type['go'] = False
                    if len(mesh) > 0:
                        sub_type['mesh'] = mesh[0].split('::')
                    sub_type['umls'] = umls
                    type_dict[e['start']] = sub_type

                    assert (len(self.pmid_dict[aid]) < 9)
                    self.db.insert(self.arg['entity_table'],
                                   entity_name=e['text'],
                                   sent_id=tmp['sentId'],
                                   article_id=tmp['articleId'],
                                   pmid=self.pmid_dict[aid][0],
                                   article_title=self.pmid_dict[aid][1],
                                   sent=tmp['sentText'])
                for r in tmp['relationMentions']:
                    self.db.insert(
                        self.arg['relation_table'],
                        entity_a=r['em1Text'],
                        entity_b=r['em2Text'],
                        relation_type=r['label'],
                        a_is_gene=type_dict[r['em1Start']]['go'],
                        b_is_gene=type_dict[r['em2Start']]['go'],
                        type_a_umls=type_dict[r['em1Start']]['umls'],
                        type_b_umls=type_dict[r['em2Start']]['umls'],
                        type_a_mesh=type_dict[r['em1Start']]['mesh'],
                        type_b_mesh=type_dict[r['em2Start']]['mesh'],
                        sent_id=tmp['sentId'],
                        article_id=tmp['articleId'])
                #except:
                #print cnt

    def generate_random_walks(self, edge_pairs, num_walks=3, num_steps=3):
        print len(edge_pairs)
        outgoing_edges = defaultdict(list)
        ingoing_edges = defaultdict(list)
        map(lambda x: outgoing_edges[x['entity_a']].append(x['entity_b']),
            edge_pairs)
        map(lambda x: ingoing_edges[x['entity_b']].append(x['entity_a']),
            edge_pairs)
        for k, v in outgoing_edges.iteritems():
            if len(v) > 0:
                v.append(k)
        #print outgoing_edges
        #print ingoing_edges
        #print outgoing_edges
        temp = max(outgoing_edges.values(), key=len)
        origin = temp[-1]
        walk_nodes = [origin]
        total_nodes = set([origin])
        edges = set()
        for i in xrange(num_steps):
            #print "++++++++++++++++++++",i
            next_step = []
            while len(walk_nodes) > 0:
                node = walk_nodes.pop()
                if i % 2 == 0:
                    node_list = outgoing_edges[node][:-1]
                    assert (len(node_list) > 0)
                else:
                    node_list = ingoing_edges[node]
                    assert (len(node_list) > 0)
                #print node
                if len(node_list) <= num_walks:
                    next_step.extend(node_list)
                    edges = edges.union(
                        list(map(lambda x: (node, x), node_list)))
                else:
                    #print "random_sample",node_list
                    random_nodes = random.sample(node_list, num_walks)
                    next_step.extend(random_nodes)
                    edges = edges.union(
                        list(map(lambda x: (node, x), random_nodes)))
            assert (len(walk_nodes) == 0)
            walk_nodes = list(set(next_step))
            total_nodes = total_nodes.union(set(next_step))
        #print total_nodes
        print len(edges)
        return edges
        #print edges

    def query_links_with_walk(self,
                              type_a,
                              type_b,
                              relation_type,
                              num_edges=5,
                              num_pps=1):
        try:
            type_a = ast.literal_eval(type_a)
            type_b = ast.literal_eval(type_b)
        except:
            pass
        #print num_edges
        #query_string = "SELECT * FROM (SELECT DISTINCT ON(entity_a,entity_b) entity_a,entity_b,sent_id  "+"FROM "+self.arg['relation_table']+" WHERE type_a_"+type_a['name']+"@>'"\
        #+type_a['type']+"' AND type_b_"+type_b['name']+"@>'"+ type_b['type']+"' AND relation_type='"+relation_type + "') x ORDER BY RANDOM() LIMIT 500"
        query_string = "SELECT * FROM (SELECT entity_a,entity_b,(array_agg('[' || article_id || ',' || sent_id || ']'))[1:" + str(num_pps) + "]  "+"FROM "+self.arg['relation_table']+" WHERE type_a_"+type_a['name']+"@>'"\
        +type_a['type']+"' AND type_b_"+type_b['name']+"@>'"+ type_b['type']+"' AND relation_type='"+relation_type + "' GROUP BY entity_a,entity_b) x ORDER BY RANDOM() LIMIT " +str(num_edges)
        q = self.db.query(query_string)
        result = {'node_a': {}, 'node_b': {}, 'edges': []}
        for p in list(self.generate_random_walks(q.dictresult())):
            result['node_a'][p[0]] = []
            result['node_b'][p[1]] = []
            result['edges'].append({'source': p[0], 'target': p[1]})
        #print result
        return result

    def get_category_entities(self):
        outputfile = open("category_entities.json", 'w')
        category_entity = dict()
        with open("mesh_type_hierarchy-2016.txt") as fp:
            for line in fp:
                mesh_id = line.split('\t', 1)[0]
                if len(mesh_id.split('.')) == 2:
                    mesh = line.split('\t', 1)[1]
                    query_string = "SELECT entity_name from entity_table_slim where entity_mesh @> '{" + mesh.replace(
                        "'", "''") + "}'"
                    # print query_string
                    entities = self.db.query(query_string)
                    entities = entities.dictresult()
                    if len(entities) == 0:
                        continue
                    category_entity[mesh[:-1]] = []
                    for entity in entities:
                        category_entity[mesh[:-1]].append(
                            entity['entity_name'])

        category_json = json.dumps(category_entity)
        outputfile.write(str(category_json))
        outputfile.close()

    def get_relations(self,
                      type_a,
                      type_b,
                      entities_left=[],
                      entities_right=[]):
        try:
            entities_left = ast.literal_eval(entities_left)
            entities_right = ast.literal_eval(entities_right)
        except:
            pass

        query_string = "SELECT distinct relation_type, entity_a, entity_b FROM relation_table_slim WHERE type_a_mesh @> '{" + type_a.replace(
            "'", "''") + "}' AND type_b_mesh @> '{\
		" + type_b.replace("'", "''") + "}'"
        q = self.db.query(query_string)
        relations = q.dictresult()
        result = set()
        for relation in relations:
            if (not entities_left or
                (relation['entity_a'] in entities_left)) and (
                    (not entities_right) or
                    (relation['entity_b'] in entities_right)):
                result.add(relation['relation_type'])
        return {'relations': list(result)}

    def query_links_by_two_sides_entities(self,
                                          entities_left,
                                          entities_right,
                                          relation_type,
                                          num_edges=5,
                                          num_pps=1):
        query_a = []
        query_b = []
        query_edges = []
        for entity_left in entities_left:
            for entity_right in entities_right:
                query_string = "SELECT (array_agg('[' || article_id || ',' || sent_id || ']'))[1:" + str(
                    num_pps + 2) + "] as sents FROM relation_table WHERE \
				entity_a='" + entity_left + "' and entity_b='" + entity_right + "' and relation_type='" + relation_type + "'"
                q = self.db.query(query_string)
                result = q.dictresult()
                article = dict()
                if result[0]['sents'] != None:
                    for x in result[0]['sents']:
                        x = ast.literal_eval(x)
                        article[x[0]] = str(x[1])
                    print "article: ", article
                    query_a.append(entity_left)
                    query_b.append(entity_right)
                    query_edges.append({
                        'source': entity_left,
                        'target': entity_right,
                        'sids': article.values(),
                        'sents': []
                    })

        red_node = dict()

        for v in query_a:
            tmp = self.db.query(
                "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '"
                + v + "' LIMIT " + str(1))
            dictresult = tmp.dictresult()[0]
            red_node[v] = []
            random_nums = random.sample(
                range(0, len(dictresult['article_titles'])),
                min(len(dictresult['article_titles']), num_pps))
            for i in range(0, len(random_nums)):
                red_node[v].append(
                    (dictresult['article_titles'][random_nums[i]],
                     dictresult['sents'][random_nums[i]],
                     dictresult['pmids'][random_nums[i]]))

        blue_node = dict()

        for v in query_b:
            tmp = self.db.query(
                "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '"
                + v + "' LIMIT " + str(1))
            dictresult = tmp.dictresult()[0]
            blue_node[v] = []
            random_nums = random.sample(
                range(0, len(dictresult['article_titles'])),
                min(len(dictresult['article_titles']), num_pps))
            for i in range(0, len(random_nums)):
                blue_node[v].append(
                    (dictresult['article_titles'][random_nums[i]],
                     dictresult['sents'][random_nums[i]],
                     dictresult['pmids'][random_nums[i]]))

        for edge in query_edges:
            for s_id in edge['sids'][:num_pps]:
                tmp = self.db.query(
                    "SELECT article_title, pmid, sent FROM entity_table WHERE sent_id = '"
                    + s_id + "' LIMIT 1")
                edge['sents'].append(tmp.dictresult()[0])

        return {'node_a': red_node, 'node_b': blue_node, 'edge': query_edges}

    def query_links_by_left_entities(self, entities_left, type_b,
                                     relation_type, num_edges, num_pps):
        self.db.query("set statement_timeout TO 0")
        query_string = "SELECT * FROM (SELECT entity_a,entity_b,(array_agg('[' || article_id || ',' || sent_id || ']'))[1:" + str(
            num_pps + 2
        ) + "] as sents FROM " + self.arg[
            'relation_table'] + " WHERE type_b_" + type_b[
                'name'] + "@>'" + type_b[
                    'type'] + "' AND relation_type='" + relation_type + "' GROUP BY entity_a,entity_b) x"
        q = self.db.query(query_string)
        result = q.dictresult()

        query_a = []
        query_b = []
        query_edges = []
        count = 0

        for r in result:
            if r['entity_a'] in entities_left:
                if count < int(num_edges):
                    count = count + 1
                else:
                    break
            query_a.append(r['entity_a'])
            query_b.append(r['entity_b'])
            article = dict()
            for x in r['sents']:
                x = ast.literal_eval(x)
                article[x[0]] = str(x[1])
            query_edges.append({
                'source': r['entity_a'],
                'target': r['entity_b'],
                'sids': article.values(),
                'sents': []
            })

        red_node = dict()

        for v in query_a:
            tmp = self.db.query(
                "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '"
                + v + "' LIMIT " + str(1))
            dictresult = tmp.dictresult()[0]
            red_node[v] = []
            random_nums = random.sample(
                range(0, len(dictresult['article_titles'])),
                min(len(dictresult['article_titles']), num_pps))
            for i in range(0, len(random_nums)):
                red_node[v].append(
                    (dictresult['article_titles'][random_nums[i]],
                     dictresult['sents'][random_nums[i]],
                     dictresult['pmids'][random_nums[i]]))

        blue_node = dict()

        for v in query_b:
            tmp = self.db.query(
                "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '"
                + v + "' LIMIT " + str(1))
            dictresult = tmp.dictresult()[0]
            blue_node[v] = []
            random_nums = random.sample(
                range(0, len(dictresult['article_titles'])),
                min(len(dictresult['article_titles']), num_pps))
            for i in range(0, len(random_nums)):
                blue_node[v].append(
                    (dictresult['article_titles'][random_nums[i]],
                     dictresult['sents'][random_nums[i]],
                     dictresult['pmids'][random_nums[i]]))

        for edge in query_edges:
            for s_id in edge['sids'][:num_pps]:
                tmp = self.db.query(
                    "SELECT article_title, pmid, sent FROM entity_table WHERE sent_id = '"
                    + s_id + "' LIMIT 1")
                edge['sents'].append(tmp.dictresult()[0])

        return {'node_a': red_node, 'node_b': blue_node, 'edge': query_edges}

    def query_links_by_right_entities(self, type_a, entities_right,
                                      relation_type, num_edges, num_pps):
        self.db.query("set statement_timeout TO 0")
        query_string = "SELECT * FROM (SELECT entity_a,entity_b,(array_agg('[' || article_id || ',' || sent_id || ']'))[1:" + str(
            num_pps + 2
        ) + "] as sents FROM " + self.arg[
            'relation_table'] + " WHERE type_a_" + type_a[
                'name'] + "@>'" + type_a[
                    'type'] + "' AND relation_type='" + relation_type + "' GROUP BY entity_a,entity_b) x"
        q = self.db.query(query_string)
        print "right entities"
        result = q.dictresult()
        query_a = []
        query_b = []
        query_edges = []
        count = 0
        for r in result:
            if r['entity_b'] in entities_right:
                if count < int(num_edges):
                    count = count + 1
                else:
                    break
            print 'entity_a: ', r['entity_a']
            print 'entity_b: ', r['entity_b']
            query_a.append(r['entity_a'])
            query_b.append(r['entity_b'])
            article = dict()
            for x in r['sents']:
                x = ast.literal_eval(x)
                article[x[0]] = str(x[1])
            query_edges.append({
                'source': r['entity_a'],
                'target': r['entity_b'],
                'sids': article.values(),
                'sents': []
            })
        print 'query_a: ', query_a
        print 'query_b: ', query_b
        print 'query_edges: ', query_edges

        red_node = dict()

        for v in query_a:
            tmp = self.db.query(
                "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '"
                + v + "' LIMIT " + str(1))
            dictresult = tmp.dictresult()[0]
            red_node[v] = []
            random_nums = random.sample(
                range(0, len(dictresult['article_titles'])),
                min(len(dictresult['article_titles']), num_pps))
            for i in range(0, len(random_nums)):
                red_node[v].append(
                    (dictresult['article_titles'][random_nums[i]],
                     dictresult['sents'][random_nums[i]],
                     dictresult['pmids'][random_nums[i]]))

        blue_node = dict()

        for v in query_b:
            tmp = self.db.query(
                "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '"
                + v + "' LIMIT " + str(1))
            dictresult = tmp.dictresult()[0]
            blue_node[v] = []
            random_nums = random.sample(
                range(0, len(dictresult['article_titles'])),
                min(len(dictresult['article_titles']), num_pps))
            for i in range(0, len(random_nums)):
                blue_node[v].append(
                    (dictresult['article_titles'][random_nums[i]],
                     dictresult['sents'][random_nums[i]],
                     dictresult['pmids'][random_nums[i]]))

        for edge in query_edges:
            for s_id in edge['sids'][:num_pps]:
                tmp = self.db.query(
                    "SELECT article_title, pmid, sent FROM entity_table WHERE sent_id = '"
                    + s_id + "' LIMIT 1")
                edge['sents'].append(tmp.dictresult()[0])

        return {'node_a': red_node, 'node_b': blue_node, 'edge': query_edges}

    def query_links_by_categories(self, type_a, type_b, relation_type,
                                  num_edges, num_pps):
        self.db.query("set statement_timeout TO 0")
        query_string_v2 = "SELECT * FROM (SELECT entity_a,entity_b,(array_agg('[' || article_id || ',' || sent_id || ']'))[1:" + str(num_pps+2) + "] as sents  "+"FROM "+self.arg['relation_table']+" WHERE type_a_"+type_a['name']+"@>'"\
        +type_a['type']+"' AND type_b_"+type_b['name']+"@>'"+ type_b['type']+"' AND relation_type='"+relation_type + "' GROUP BY entity_a,entity_b) x ORDER BY RANDOM() LIMIT " +str(num_edges)
        print query_string_v2
        q = self.db.query(query_string_v2)

        temp_time = time.time() - self.start_time
        self.start_time = time.time()
        print "------query time cost = ", temp_time

        result = q.dictresult()

        query_a = []
        query_b = []
        query_edges = []
        for r in result:
            print 'entity_a: ', r['entity_a']
            print 'entity_b: ', r['entity_b']
            query_a.append(r['entity_a'])
            query_b.append(r['entity_b'])
            article = dict()
            for x in r['sents']:
                x = ast.literal_eval(x)
                article[x[0]] = str(x[1])
            query_edges.append({
                'source': r['entity_a'],
                'target': r['entity_b'],
                'sids': article.values(),
                'sents': []
            })
        print "query_a: ", query_a
        print "query_b: ", query_b

        red_node = dict()

        for v in query_a:
            # tmp=self.db.query("select article_title, pmid, sent from entity_table where entity_name= '" + v + "' LIMIT "+str(num_pps))
            tmp = self.db.query(
                "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '"
                + v + "' LIMIT " + str(1))
            dictresult = tmp.dictresult()[0]
            red_node[v] = []
            random_nums = random.sample(
                range(0, len(dictresult['article_titles'])),
                min(len(dictresult['article_titles']), num_pps))
            for i in range(0, len(random_nums)):
                red_node[v].append(
                    (dictresult['article_titles'][random_nums[i]],
                     dictresult['sents'][random_nums[i]],
                     dictresult['pmids'][random_nums[i]]))
            # red_node[v] = map(lambda x:(x['article_titles'],x['sents'],x['pmids']),tmp.dictresult())

        blue_node = dict()
        for v in query_b:
            tmp = self.db.query(
                "SELECT article_titles, pmids, sents FROM entity_table_compressed WHERE entity_name= '"
                + v + "' LIMIT " + str(1))
            dictresult = tmp.dictresult()[0]
            blue_node[v] = []
            random_nums = random.sample(
                range(0, len(dictresult['article_titles'])),
                min(len(dictresult['article_titles']), num_pps))
            for i in range(0, len(random_nums)):
                blue_node[v].append(
                    (dictresult['article_titles'][random_nums[i]],
                     dictresult['sents'][random_nums[i]],
                     dictresult['pmids'][random_nums[i]]))

        temp_time = time.time() - self.start_time
        self.start_time = time.time()
        print "------query entity_a and entity_b time cost = ", temp_time

        for edge in query_edges:
            for s_id in edge['sids'][:num_pps]:
                tmp = self.db.query(
                    "select article_title, pmid, sent from entity_table where sent_id= '"
                    + s_id + "' LIMIT 1")
                edge['sents'].append(tmp.dictresult()[0])
        temp_time = time.time() - self.start_time
        self.start_time = time.time()
        print "red_node nubmer = ", len(red_node), " blue_node number = ", len(
            blue_node), " edges number = ", len(query_edges)
        print "------query edges time cost = ", temp_time

        return {'node_a': red_node, 'node_b': blue_node, 'edge': query_edges}

    def query_links_v2(self,
                       type_a,
                       type_b,
                       relation_type,
                       entities_left=[],
                       entities_right=[],
                       num_edges=5,
                       num_pps=1):
        try:
            type_a = ast.literal_eval(type_a)
            type_b = ast.literal_eval(type_b)
            entities_left = ast.literal_eval(entities_left)
            entities_right = ast.literal_eval(entities_right)
        except:
            pass
        if len(entities_left) > 0 and len(entities_right) > 0:
            print "two sides"
            result = self.query_links_by_two_sides_entities(
                entities_left, entities_right, relation_type, num_edges,
                num_pps)
        elif len(entities_left) == 0 and len(entities_right) == 0:
            print "categories"
            result = self.query_links_by_categories(type_a, type_b,
                                                    relation_type, num_edges,
                                                    num_pps)
        elif len(entities_left) == 0:
            print "right"
            result = self.query_links_by_right_entities(
                type_a, entities_right, relation_type, num_edges, num_pps)
        elif len(entities_right) == 0:
            print "left"
            result = self.query_links_by_left_entities(entities_left, type_b,
                                                       relation_type,
                                                       num_edges, num_pps)
        nodes = []
        if 'node_a' in result:
            for node_name in result['node_a']:
                node = dict()
                node['name'] = node_name
                node['sents'] = []
                for article in result['node_a'][node_name]:
                    temp = dict()
                    temp['artitle_title'] = article[0]
                    temp['sent'] = article[1]
                    temp['pmid'] = article[2]
                    node['sents'].append(temp)
                node['group'] = 1
                nodes.append(node)
        if 'node_b' in result:
            for node_name in result['node_b']:
                node = dict()
                node['name'] = node_name
                node['sents'] = []
                for article in result['node_b'][node_name]:
                    temp = dict()
                    temp['artitle_title'] = article[0]
                    temp['sent'] = article[1]
                    temp['pmid'] = article[2]
                    node['sents'].append(temp)
                node['group'] = 2
                nodes.append(node)
        # print json.dumps({'edges': result['edge'], 'nodes': nodes})
        return {'edges': result['edge'], 'nodes': nodes}

    def query_links(self,
                    type_a,
                    type_b,
                    relation_type,
                    num_edges=5,
                    num_pps=1):
        #type_a={'mesh':0, 'name':"Chemicals_and_Drugs"}
        try:
            type_a = ast.literal_eval(type_a)
            type_b = ast.literal_eval(type_b)

        except:
            pass
        print "type_a = ", type_a
        print "type_b = ", type_b
        print "relation_type = ", relation_type
        self.db.query("set statement_timeout TO 0")
        #query_string = "SELECT * INTO " +self.identity+ " FROM (SELECT DISTINCT ON(entity_a,entity_b) entity_a,entity_b,sent_id  "+"FROM "+self.arg['relation_table']+" WHERE type_a_"+type_a['name']+"@>'"\
        #+type_a['type']+"' AND type_b_"+type_b['name']+"@>'"+ type_b['type']+"' AND relation_type='"+relation_type + "') x ORDER BY RANDOM() LIMIT " +str(num_edges)
        query_string_v2 = "SELECT * FROM (SELECT entity_a,entity_b,(array_agg('[' || article_id || ',' || sent_id || ']'))[1:" + str(num_pps+2) + "] as sents  "+"FROM "+self.arg['relation_table']+" WHERE type_a_"+type_a['name']+"@>'"\
        +type_a['type']+"' AND type_b_"+type_b['name']+"@>'"+ type_b['type']+"' AND relation_type='"+relation_type + "' GROUP BY entity_a,entity_b) x ORDER BY RANDOM() LIMIT " +str(num_edges)
        print query_string_v2
        q = self.db.query(query_string_v2)

        temp_time = time.time() - self.start_time
        self.start_time = time.time()
        # print "------query time cost = ", temp_time

        result = q.dictresult()
        # print "result length = ", len(result)
        query_a = []
        query_b = []
        query_edges = []
        for r in result:
            query_a.append(r['entity_a'])
            query_b.append(r['entity_b'])
            article = dict()
            for x in r['sents']:
                x = ast.literal_eval(x)
                article[x[0]] = str(x[1])
            query_edges.append({
                'source': r['entity_a'],
                'target': r['entity_b'],
                'sids': article.values(),
                'sents': []
            })

        red_node = dict()
        # print "len query_a = ", len(query_a), "; len query_b = ", len(query_b), "; len query_edges", len(query_edges)
        for v in query_a:
            #print "select distinct on (article_id) article_title, pmid, sent from entity_table where entity_name= '" + v['entity_a'] + "' LIMIT "+str(num_pps)
            #print "select distinct on (article_id) article_title, pmid, sent from entity_table where entity_name= '" + v['entity_a'] + "' LIMIT 2"
            tmp = self.db.query(
                "select article_title, pmid, sent from entity_table where entity_name= '"
                + v + "' LIMIT " + str(num_pps))
            #tmp=self.db.query("select distinct on (article_id) article_title, pmid, sent from entity_table where entity_name= '" + v['entity_a'] + "' LIMIT "+str(num_pps))
            red_node[v] = map(
                lambda x: (x['article_title'], x['sent'], x['pmid']),
                tmp.dictresult())
        #query_b=self.db.query("select entity_b from "+self.identity)
        blue_node = dict()
        for v in query_b:
            #pass
            #print "select distinct on (article_id) article_title, pmid, sent from entity_table where entity_name= '" + v['entity_b'] + "' LIMIT "+str(num_pps)
            tmp = self.db.query(
                "select article_title, pmid, sent from entity_table where entity_name= '"
                + v + "' LIMIT " + str(num_pps))
            #tmp=self.db.query("select distinct on (article_id) article_title, pmid, sent from entity_table where entity_name= '" + v['entity_b'] + "' LIMIT "+str(num_pps))
            blue_node[v] = map(
                lambda x: (x['article_title'], x['sent'], x['pmid']),
                tmp.dictresult())
        temp_time = time.time() - self.start_time
        self.start_time = time.time()
        # print "------query entity_a and entity_b time cost = ", temp_time

        #print red_node,blue_node
        for edge in query_edges:
            for s_id in edge['sids'][:num_pps]:
                tmp = self.db.query(
                    "select article_title, pmid, sent from entity_table where sent_id= '"
                    + s_id + "' LIMIT 1")
                edge['sents'].append(tmp.dictresult()[0])
        temp_time = time.time() - self.start_time
        self.start_time = time.time()
        # print "red_node nubmer = ", len(red_node), " blue_node number = ", len(blue_node), " edges number = ", len(query_edges)
        # print "------query edges time cost = ", temp_time

        #query_edge = "SELECT DISTINCT T.entity_a as source, T.entity_b as target, E.pmid, E.article_title,E.sent FROM " + self.arg['entity_table'] + " AS E INNER JOIN "+\
        #self.identity+" T ON E.sent_id = T.sent_id";
        #query_edges = "SELECT distinct on (R.article_id) R.article_id, R.sent_id from " + self.arg['relation_table']+ " AS R inner join " + self.identity+  \
        #" T on R.entity_a = T.entity_a and R.entity_b = T.entity_b and R.relation_type = \'" + relation_type +"' LIMIT " +str(num_pps)
        #"distinct on (article_id) inner join on entity_a, entity_b, relation_type"
        #q = self.db.query(query_edge)
        #self.db.query("drop table "+self.identity)
        return {'node_a': red_node, 'node_b': blue_node, 'edge': query_edges}

Exemplo n.º 25

0

Exibir arquivo

Arquivo: PostGreDBConnector.py Projeto: mdth/Masterarbeit

class PostGreDBConnector:
    """PostGreDBConnector opens a PostGre DB connection. Different functions allow you to add, delete or update
    documents in PostGre DB."""

    def __init__(self):
        """Connecting to localhost (default host and port [localhost, 4532]) PostGre DB and initializing needed data
            base and tables."""
        try:
            print("Connecting to PostGre DB...")
            self.__db = DB(dbname='testdb', host='localhost', port=5432, user='******', passwd='superuser')
            print("PostGre DB connection successfully built.")
        except ConnectionError:
            print("PostGre DB connection could not be built.")

        self.delete_all_data()
        self.drop_all_tables()

    def close_connection(self):
        self.__db.close()

    def create_schema(self, schema_name):
        self.__db.query("CREATE SCHEMA " + schema_name)
        self.__create_tables(schema_name)
        self.__create_functions(schema_name)

    def __create_tables(self, schema):
        """Create needed tables for RDF parsing."""
        schema += "."
        self._add_table("CREATE TABLE " + schema + "texts (id serial primary key, title text)")
        self._add_table(
            "CREATE TABLE " + schema + "bscale (id serial primary key, bscale text, nominal bool, ordinal bool, interval bool)")
        self._add_table("CREATE TABLE " + schema + "bsort (id serial primary key, bsort text)")
        self._add_table("CREATE TABLE " + schema + "pattern (id serial primary key, pattern text)")
        self._add_table("CREATE TABLE " + schema + "single_pattern (id serial primary key, single_pattern text)")
        self._add_table("CREATE TABLE " + schema + "snippets (id serial primary key, snippet text)")

        # relations
        self._add_table("CREATE TABLE " + schema + "has_attribute (bsort_id int, bscale_id integer[], aggregation int)")
        self._add_table("CREATE TABLE " + schema + "has_object (bscale_id int, pattern_id integer[], aggregation int)")
        self._add_table(
            "CREATE TABLE " + schema + "pattern_single_pattern (pattern_id int, single_pattern_id integer[], aggregation int)")
        self._add_table("CREATE TABLE " + schema + "texts_snippets (text_id int primary key, snippet_id integer[], aggregation int)")
        self._add_table(
            "CREATE TABLE " + schema + "snippet_offsets (id serial primary key,"
            " single_pattern_id int, snippet_id int, offsets integer[][], aggregation int)")

        # adjective and verb extractions
        self._add_table("CREATE TABLE " + schema + "subject_occ (id serial primary key, subject text, count int)")
        self._add_table("CREATE TABLE " + schema + "adjective_occ (id serial primary key, adjective text, count int)")
        self._add_table("CREATE TABLE " + schema + "verb_occ (id serial primary key, verb text, count int)")
        self._add_table("CREATE TABLE " + schema + "object_occ (id serial primary key, object text, count int)")
        self._add_table("CREATE TABLE " + schema + "subject_adjective_occ (id serial primary key, subject int, adjective int, count int, pmi float)")
        self._add_table("CREATE TABLE " + schema + "subject_object_occ (id serial primary key, subject int, object int, count int, pmi float)")
        self._add_table("CREATE TABLE " + schema + "object_verb_occ (id serial primary key, object int, verb int, count int, pmi float)")
        self._add_table("CREATE TABLE " + schema + "subject_verb_occ (id serial primary key, subject int, verb int, count int, pmi float)")

        # correlating pattern
        self._add_table("CREATE TABLE " + schema + "bscale_single_pattern (id serial primary key, bscale_id int, single_pattern_id int, single_pattern text, count int)")
        self._add_table(
            "CREATE TABLE " + schema + "correlating_pattern (id serial primary key, pattern_a int, pattern_b int, count int, pmi float)")

    def __create_functions(self, schema):
        """Create all necessary functions to aggregate the results saved in the database."""
        schema += "."
        self.add_function(schema + "aggregate_texts_snippets", "SELECT text_id, array_length(snippet_id, 1) FROM " + schema + "texts_snippets")
        self.add_function(schema + "aggregate_snippet_offsets", "SELECT id, array_length(offsets, 1) FROM " + schema + "snippet_offsets")

    def add_function(self, name, function):
        """Create a new function in the db."""
        create_function = "CREATE FUNCTION "
        returns = "() RETURNS SETOF RECORD AS "
        lang = " LANGUAGE SQL"
        query = create_function + name + returns + add_quotes(function) + lang
        self.__db.query(query)

    def _add_table(self, query):
        """Create a new table with a query."""
        self.__db.query(query)

    def add_table(self, schema, name, rows):
        """Create a new table with a name and rows given in query form."""
        create_table = "CREATE TABLE "
        query = create_table + schema + "." + name + rows
        self.__db.query(query)

    def insert(self, schema, table, row):
        """Insert a new row element into a specified table."""
        return self.__db.insert(schema + "." + table, row)

    def is_in_table(self, schema, table, where_clause):
        """Returns whether a row already exists in a table or not."""
        select = "SELECT * FROM "
        where = " WHERE "
        q = select + schema + "." + table + where + where_clause
        result = self.__db.query(q).dictresult()
        if len(result) > 0:
            return True
        else:
            return False

    def update(self, schema, table, values, where_clause):
        """Update an entry in a specified table."""
        UPDATE = "UPDATE "
        SET = " SET "
        WHERE = " WHERE "
        query = UPDATE + schema + "." + table + SET + values + WHERE + where_clause
        self.query(query)

    def get(self, schema, table, where_clause, key):
        """Return the key of a specific item in a table."""
        select = "SELECT "
        _from = " FROM "
        where = " WHERE "
        q = select + key + _from + schema + "." + table + where + where_clause
        result = self.__db.query(q).dictresult()
        if len(result) > 0:
            return result[0][key]
        else:
            return None

    def get_data_from_table(self, schema, table):
        """Gets all data available in a specific table."""
        return self.__db.query("SELECT * FROM " + schema + "." + table).dictresult()

    def get_id(self, schema, table, where_clause):
        """Return the id of an item in a table. If found return id number of found item, else None."""
        select = "SELECT id FROM "
        where = " WHERE "
        q = select + schema + "." + table + where + where_clause
        result = self.__db.query(q).dictresult()
        if len(result) > 0:
            return result[0]['id']
        else:
            return None

    def delete_from_table(self, schema, table, row):
        """Delete a row element form a specific table."""
        return self.__db.delete(schema + "." + table, row)

    def delete_data_in_table(self, schema, table):
        """Delete all data in a specific table."""
        self.__db.truncate(schema + "." + table, restart=True, cascade=True, only=False)

    def delete_all_data(self):
        """Deletes all data from all existing tables."""
        tables = self.get_tables()
        for table in tables:
            table_name = str(table)
            self.__db.truncate(table_name, restart=True, cascade=True, only=False)

    def get_tables(self):
        """Get all available tables in the database."""
        return self.__db.get_tables()

    def get_attributes(self, schema, table):
        """Get all attributes of a specified table."""
        return self.__db.get_attnames(schema + "." + table)

    def drop_table(self, schema, table):
        """Drops a specified table."""
        query = "DROP TABLE "
        self.__db.query(query + schema + "." + table)

    def drop_all_tables(self):
        """Drops all existing tables."""
        tables = self.get_tables()
        table_names = ""
        if len(tables) > 0 :
            for ind, table in enumerate(tables):
                if ind == 0:
                    table_names = str(table)
                else:
                    table_names = table_names + ", " + str(table)
            self.__db.query("DROP TABLE " + table_names)
        else:
            print("Nothing to delete.")

    def get_all(self, schema, table, attribute):
        """Gets one or more attributes of all entries from a specified table."""
        select = "SELECT "
        _from = " FROM "
        query = select + attribute + _from + schema + "." + table
        return self.__db.query(query).dictresult()

    def query(self, query):
        """Sends a query to the database."""
        result = self.__db.query(query)
        if result is not None:
            if not isinstance(result, str):
                return result.dictresult()
        else:
            return result

Exemplo n.º 26

0

Exibir arquivo

            WHERE date_part(\'month\'::text, new.start_time) = ' + m + '::double precision AND date_part(\'year\'::text, new.start_time) = ' + str(
            year
        ) + '::double precision DO INSTEAD  INSERT INTO scoot.agg_15_' + str(
            year
        ) + m + ' (detector, start_time, end_time, flow_mean, occ_mean, vehicle_occ_mean, lpu_factor_mean)\
            VALUES (new.detector, new.start_time, new.end_time, new.flow_mean, new.occ_mean, new.vehicle_occ_mean, new.lpu_factor_mean)'

        q = 'CREATE TABLE scoot.agg_15_' + str(year) + m + '(detector text, \
        start_time timestamp without time zone, end_time timestamp without time zone, \
        flow_mean int, occ_mean double precision, vehicle_occ_mean int, \
        LPU_factor_mean double precision, \
        CONSTRAINT c' + str(
            year
        ) + m + ' CHECK (date_part(\'month\'::text, start_time) = ' + m + '::double precision AND date_part(\'year\'::text, start_time) = ' + str(
            year) + '::double precision))'
        db.query(q)
        db.query(r)
        print('Table Created')
        db.inserttable('scoot.agg_15_' + str(year) + m, sdata)
        print('Table Inserted')

# Insert outflowing data from a month earlier than start_month
if not sdata_prev.empty:
    if month == 1:
        year = year - 1
        m = '12'
    else:
        if (month - 1) < 10:
            m = '0' + str(month - 1)
        else:
            m = str(month - 1)

Exemplo n.º 27

0

Exibir arquivo

class TPCDS:

    STORAGE = [
        ("small_storage", "appendonly=true, orientation=column"),
        ("medium_storage", "appendonly=true, orientation=column, compresstype=zstd"),
        ("large_storage", "appendonly=true, orientation=column, compresstype=zstd"),
        ("e9_medium_storage", "appendonly=true, compresstype=zstd"),
        ("e9_large_storage", "appendonly=true, orientation=column, compresstype=zstd")
    ]

    RAW_DATA_PATH = "/data/generated_source_data/data"

    def __init__(self, info_dir, dbname, port, host, data_path):
        self.info_dir = info_dir
        self.db = DB(dbname=dbname, port=port, host=host)
        self.dist_info = self.parse_dist_info()
        self.data_path = data_path

    def create_schema(self):
        sqls = [
            "DROP SCHEMA IF EXISTS tpcds CASCADE;",
            "DROP SCHEMA IF EXISTS ext_tpcds CASCADE;",
            "CREATE SCHEMA tpcds;",
            "CREATE SCHEMA ext_tpcds;"
        ]
        for sql in sqls:
            self.db.query(sql)

    def create_table(self):
        ddl_top_path = os.path.join(self.info_dir, "ddl")
        print("creating norm tables...")
        for fn in os.listdir(ddl_top_path):
            if not fn.endswith(".sql"): continue
            if fn == "000.e9.tpcds.sql": continue
            if "ext_" in fn: continue
            self.create_normal_table(os.path.join(ddl_top_path, fn))
        print("norm tables created.")

        print("creating ext tables...")        
        for fn in os.listdir(ddl_top_path):
            if not fn.endswith(".sql"): continue
            if fn == "000.e9.tpcds.sql": continue
            if "ext_" not in fn: continue
            self.create_ext_table(os.path.join(ddl_top_path, fn))
        print("ext tables created.")

    def create_normal_table(self, ddlpath):
        assert("ext_" not in ddlpath)
        with open(ddlpath) as f:
            sql = f.read().lower()
        tabname = self.get_tabname_from_path(ddlpath)
        sql = self.patch_dist_info(sql, tabname)
        sql = self.patch_storage_info(sql)
        self.db.query(sql)

    def create_ext_table(self, ddlpath):
        assert("ext_" in ddlpath)
        with open(ddlpath) as f:
            sql = f.read().lower()
        tabname = self.get_tabname_from_path(ddlpath)
        sql = self.patch_gpfdist_local(sql, tabname)
        self.db.query(sql)

    def patch_dist_info(self, sql, tabname):
        distkeys = self.dist_info[tabname]
        sql = sql.replace(":distributed_by",
                          "distributed by (%s)" % distkeys)
        return sql

    def patch_storage_info(self, sql):
        for storage_type, storage_option in self.STORAGE:
            replace_key = ":" + storage_type
            sql = sql.replace(replace_key, storage_option)
        return sql

    def patch_gpfdist_local(self, sql, tabname):
        url = "'gpfdist://mdw:2223/%s*.dat'" % tabname
        sql = sql.replace(":location", url)
        return sql
    
    def get_tabname_from_path(self, path):
        filename = os.path.basename(path)
        return filename.split(".")[-2]

    def parse_dist_info(self):
        dist_info = {}
        with open(os.path.join(self.info_dir, "ddl", "distribution.txt")) as f:
            for line in f:
                _, tabname, distkeys = line.strip().split("|")
                dist_info[tabname] = distkeys
        return dist_info

    def start_gpfdist(self, port, logfile):
        cmd = ["gpfdist",
               "-p", str(port),
               "-l", logfile]
        proc = subprocess.Popen(cmd)
        sleep(3)
        return proc

    def load_all_tables(self):
        ddl_top_path = os.path.join(self.info_dir, "ddl")
        start_time = time()
        print("load tables...")
        print("===================================")
        for fn in os.listdir(ddl_top_path):
            if not fn.endswith(".sql"): continue
            if fn == "000.e9.tpcds.sql": continue
            if "ext_" in fn: continue
            self.load_table(os.path.join(ddl_top_path, fn))
        end_time = time()
        cost = end_time - start_time
        print("all tables finished in %s seconds" % cost)

    def load_table(self, ddlpath):
        tabname = self.get_tabname_from_path(ddlpath)
        tab_data_path = self.get_tab_data_path(tabname)
        os.chdir(tab_data_path)
        proc = self.start_gpfdist("2223", "/data/gpfdist.log")
        sql = ("insert into tpcds.%s "
               "select * from ext_tpcds.%s") % (tabname, tabname)
        start_time = time()
        self.db.query(sql)
        end_time = time()
        proc.terminate()
        proc.wait()
        cost = end_time - start_time
        print("load %s cost time %s seconds" % (tabname, cost))
        print("===================================")

    def get_tab_data_path(self, tabname):
        return os.path.join(self.data_path, tabname)

    def move_all_tables(self):
        ddl_top_path = os.path.join(self.info_dir, "ddl")
        for fn in os.listdir(ddl_top_path):
            if not fn.endswith(".sql"): continue
            if fn == "000.e9.tpcds.sql": continue
            if "ext_" in fn: continue
            self.move_table(os.path.join(ddl_top_path, fn))

    def move_table(self, ddlpath):
        tabname = self.get_tabname_from_path(ddlpath)
        fns = self.findall_tab_data(tabname)
        newplace = os.path.join(self.data_path, tabname)
        os.makedirs(newplace, exist_ok=True)
        for fn in fns:
            shutil.move(fn, newplace)
        print("moving %s (%d dat files)" % (tabname, len(fns)))

    def findall_tab_data(self, tabname):
        data_fns = []
        pt_regstr = tabname + r"_\d+_\d+.dat"
        pt = re.compile(pt_regstr)
        for fn in os.listdir(self.RAW_DATA_PATH):
            if pt.search(fn):
                data_fns.append(os.path.join(self.RAW_DATA_PATH, fn))
        return data_fns

    def close_db(self):
        self.db.close()

Exemplo n.º 28

0

Exibir arquivo

Arquivo: pipe2json.py Projeto: bblu/algopython

                name = 'st_asgeojson(wkb_geometry)as geometry'
            cols+=name+','
    cols = cols.rstrip(',')
    cols += ' from %s ) tb;' % tbname
    return cols

#pgtbs=pgcnn.query("select tablename from pg_tables where schemaname='public' and tablename like 'bs_%_layer' order by tablename;")
#tbrows=pgtbs.namedresult()
tbrows=['pipe']
for row in tbrows:
    #tbname=row.tablename
    tbname=row
    print(tbname)
    sql = genSql(tbname)
    print(sql)
    dts=pgcnn.query(sql).namedresult()
    c = 0
    d = 0
    w = open('pipe.json','w')
    for dt in dts:
        dt.json['xdwz']=''
        dt.json['sccj']=''
        dt.json['sjdw']=''
        dt.json['sgdw']=''
        dt.json['wxyy']=''
        dt.json['fsfs']=u"直埋"
        dt.json['fssd']=1.5
        dt.json['bwcl']=u"聚氨酯"
        dt.json['whklx']=u"聚乙烯"
        dt.json['bz']="20180920"
        dt.json['zw_type']='597'

Exemplo n.º 29

0

Exibir arquivo

Arquivo: excel2pg.py Projeto: bblu/algopython

import sys
type = sys.getfilesystemencoding()
print(type)

db=DB(dbname='postgres',host='localhost',port=5432,user='******',passwd='postgres')

xl=xlrd.open_workbook(u'电大所-海银御景站-管网.xls')
#xl=xlrd.open_workbook('/home/bblu/repo/algopython/database/pipe.xls')
sheetname = xl.sheet_names()
xl_sheet = xl.sheet_by_index(0)
print xl_sheet.name
xl_sheet1 = xl.sheet_by_name(u'Sheet1')
print xl_sheet1.name
fileds=""
for col in range(0,xl_sheet.ncols):
    fileds+=xl_sheet.cell(0,col).value+','
print(fileds)

for row in range(1,xl_sheet.nrows):
    values=""
    #这里可以选择必要的两三个字段看看在第几列
    #values+= xl_sheet.cell(row,col).value + ','
    for col in range(0,xl_sheet.ncols):
        values+= unicode(xl_sheet.cell(row,col).value) + ','
    #print(values)
    #去掉最后逗号
    fields=fields[:-1]
    sql = 'insert into dwzy.table (sbid,equip) values(%s)'%values
    db.query(sql)

Exemplo n.º 30

0

Exibir arquivo

Arquivo: pg_add_co.py Projeto: bblu/algopython

from pg import DB
     
db=DB(dbname='gis',host='localhost',port=5432,user='******',passwd='pswd')
# 1.drop tables begin with 't_'
q=db.query("SELECT tablename FROM  pg_tables WHERE schemaname='public' and tablename LIKE 't\_%'")
rows=q.namedresult()
c=0;
for row in rows:
        tbname=row.tablename
        alt='ALTER TABLE %s ADD COLUMN' % tbname
        sql='%s edittype_update_ integer' % alt
        sql='%s; %s edittime_update_ timestamp with time zone'%(sql,alt)
        sql='%s; %s editversionid_update_ bigint;'%(sql,alt)
        
        db.query(sql)
        c=c+1
        print('%d : %s' %(c,sql))
        
db.close()

Exemplo n.º 31

0

Exibir arquivo

Arquivo: well_find max min.py Projeto: HuskyHsu/Cross-Correlation-of-Discrete-and-Continuous-data

# -*- coding: utf-8 -*-
"""
insert data to PostgreSQL

@author: Shih-Chi
"""

from pg import DB
import numpy as np
import datetime

db = DB(dbname='Pingtung', host='localhost', port=5432,
    user='******', passwd='husky')

sites = db.query('select site from "Well_Stations"').getresult()
for site in sites:
    
    mindate = db.query('select MIN(date),MAX(date) from "Well" where site like \'{site}\''.format(site = site[0])).getresult()
    if mindate[0][0] != None: 
        print('{site}: date from {ys}/{ms}/{ds} to {ye}/{me}/{de}'.format(site=site[0],ys=mindate[0][0].year,ms=mindate[0][0].month,ds=mindate[0][0].day,ye=mindate[0][1].year,me=mindate[0][1].month,de=mindate[0][1].day))
        db.query('UPDATE "Well_Stations" SET starttime = \'{ys}/{ms}/{ds}\', endtime = \'{ye}/{me}/{de}\' WHERE site like \'{site}\''.format(site=site[0],ys=mindate[0][0].year,ms=mindate[0][0].month,ds=mindate[0][0].day,ye=mindate[0][1].year,me=mindate[0][1].month,de=mindate[0][1].day));

Exemplo n.º 32

0

Exibir arquivo

Arquivo: db_exp_struct.py Projeto: bblu/algopython

import os
from pg import DB

db=DB(dbname='db_meta',host='172.16.1.10',port=5432,user='******',passwd='password')
# 1.drop test tmp tables
q=db.query("SELECT sys_tablename AS tablename  FROM sys_component where sys_tablename like 't\_%' order by sys_tablename;")

rows=q.namedresult()
i=0
for row in rows:
    tbname=row.tablename
    cmd='pg_dump -h 172.16.1.10 -U postgres -F p -n public -s -t %s dbName >> D:/pgdata/db_meta_struct.sql' % tbname;
    os.system(cmd)
    i=i+1
    print('export table: %s ,%d' % (tbname,i))

Exemplo n.º 33

0

Exibir arquivo

Arquivo: plot.py Projeto: HuskyHsu/Cross-Correlation-of-Discrete-and-Continuous-data

sites = get_well_list(eventtime)

for site in sites:
    time, data = get_well_data(site, eventtime)
    
    fig = figure(figsize=(12, 8), dpi=300)
    
    title('{site} ({ST}~{ET})'.format(site = site, ST = str(eventtime[0]), ET = str(eventtime[1])))
    plot(time, data, color='b', linewidth=1.5)
    fig.savefig(os.path.join(newpath, 'well\{site}.png'.format(site = site)), dpi=200, bbox_inches='tight')
    print(site)
    close('all')


sites = get_P_list(eventtime)

for site in sites:
    time, data = get_P_data(site, eventtime)
    
    siteXY = db.query('select "TWD97_X", "TWD97_Y" from "Precipitation_Stations" where site = \'{site}\''.format(site = site)).getresult()
    file.write('{site}, {s}, {x}, {y}\n'.format(site=site, s=np.sum(data), x=float(siteXY[0][0]), y=float(siteXY[0][1])))
    
    fig = figure(figsize=(12, 8), dpi=300)
    
    title('{site} ({ST}~{ET})'.format(site = site, ST = str(eventtime[0]), ET = str(eventtime[1])))
    step(time, data, color='b', linewidth=1.5, where='mid')
    fig.savefig(os.path.join(newpath, 'P\{site}.png'.format(site = site)), dpi=200, bbox_inches='tight')
    print(site)
    close('all')

file.close()

Exemplo n.º 34

0

Exibir arquivo

Arquivo: taxonmatch.py Projeto: iobis/gbif-marine

from itertools import chain
from suds.client import Client
from pg import DB
import time

reload(sys)
sys.setdefaultencoding("utf8")
start_time = time.time()
url = "http://www.marinespecies.org/aphia.php?p=soap&wsdl=1"
client = Client(url)
db = DB(dbname="gbif", host="localhost", port=5432, user="******", passwd="postgres")

offset = 0
while True:

	names = db.query("select name from gbif.names where match_type is null limit 50 offset " + str(offset)).getresult()
	names = list(chain(*names))

	if (len(names) == 0):
			break

	scientificnames = client.factory.create("scientificnames")
	scientificnames.item = names
	scientificnames._arrayType = "xsd:string[]"

	try:
		results = client.service.matchAphiaRecordsByNames(scientificnames, True, True, False)
	except:
		e = sys.exc_info()[0]
		print "Error: " + str(e)
		results = []

Exemplo n.º 35

0

Exibir arquivo

Arquivo: test_tutorial.py Projeto: Cito/pygresql

 def setUp(self):
     """Setup test tables or empty them if they already exist."""
     db = DB(dbname=dbname, host=dbhost, port=dbport)
     db.query("set datestyle to 'iso'")
     db.query("set default_with_oids=false")
     db.query("set standard_conforming_strings=false")
     db.query("set client_min_messages=warning")
     db.query("drop table if exists fruits cascade")
     db.query("create table fruits(id serial primary key, name varchar)")
     self.db = db

Exemplo n.º 36

0

Exibir arquivo

Arquivo: runQD.py Projeto: Geof23/QFP

# extract params

test1 = sys.argv[1]
test2 = sys.argv[2]
tpath1 = "test1"
tpath2 = "test2"

# read data from db
# these are the params we need for build
# comp1, comp2, flags1, flags2
# these are the params we need for doDiffGdb.py:
# tname prec1 prec2 sort1 sort2

db = DB(dbname="qfp", host="localhost", user="******", passwd="qfp123")
q1 = db.query("select * from tests where index  = " + test1)
q2 = db.query("select * from tests where index  = " + test2)

d1 = q1.dictresult()[0]
print("queried for test 1, found? " + str(len(d1) > 0))
comp1 = d1["compiler"]
flags1 = d1["switches"]
prec1 = d1["precision"]
tname = d1["name"]
sort1 = d1["sort"]

d2 = q2.dictresult()[0]
print("queried for test 2, found? " + str(len(d2) > 0))
comp2 = d2["compiler"]
flags2 = d2["switches"]
prec2 = d2["precision"]

Exemplo n.º 37

0

Exibir arquivo

Arquivo: insert2pgdb.py Projeto: HuskyHsu/Cross-Correlation-of-Discrete-and-Continuous-data

# -*- coding: utf-8 -*-
"""
insert data to PostgreSQL

@author: Shih-Chi
"""

from pg import DB
import numpy as np
import datetime

db = DB(dbname='Pingtung', host='localhost', port=5432,
    user='******', passwd='husky')


db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'中正(1)\'');
db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'石化(1)\'');
db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'林園(1)\'');
db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'潮寮(1)\'');
db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'永芳(1)\'');
db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'昭明(1)\'');
db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'溪埔(1)\'');
db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'九曲(1)\'');
db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'大樹(1)\'');
db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'旗山(1)\'');
db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'美濃(1)\'');
db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'旗美(1)\'');
db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'新威(1)\'');
db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'清溪(1)\'');
db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'海豐(1)\'');
db.query('UPDATE "Well_Stations" SET layer = 1 WHERE name like \'前進\'');

Exemplo n.º 38

0

Exibir arquivo

Arquivo: Logs2DatabaseConverter.py Projeto: jt1/honeypot-ssh

            "../old-hp-data/log20110713.txt",
            "../old-hp-data/log20110728.txt",
            "../old-hp-data/log20110801.txt",
            "../old-hp-data/log20110816.txt",
            "../old-hp-data/log20110825.txt",
            "../old-hp-data/log20110903.txt",
            "../old-hp-data/log20110913.txt",
            "../old-hp-data/log20111014.txt",
            "../old-hp-data/log20111024.txt",
            "../old-hp-data/log20111106.txt",
            "../old-hp-data/log20111128.txt",
            ]
    honeypotName = 'HoneypotOld'
    port = 22

    sessionDatabaseID = DB.query(db, "SELECT MAX(session) FROM connections WHERE honeypot='" + honeypotName + "'").getresult()[0][0]
    if not sessionDatabaseID:
        sessionDatabaseID = 1;
    sessionOld = -1
    
    ip = ''
    sessionStartTime = ''
    sessionEndTime = ''
    user = ''
    password = ''
    for fileName in fileNames:
        for line in input(fileName):
            if len(line) < 20:
                continue
            
            if "Session" in line:

Exemplo n.º 39

0

Exibir arquivo

		copy mysql_dwh_staging.orders from 's3://troy-de-course/orders/current/orders.csv'
		iam_role 'arn:aws:iam::387932593219:role/myredshift'
		CSV QUOTE '\"' DELIMITER ','
		acceptinvchars;

		delete from mysql_dwh.orders
		using mysql_dwh_staging.orders
		where mysql_dwh.orders.order_id = mysql_dwh_staging.orders.order_id;
		
		insert into mysql_dwh.orders select * from mysql_dwh_staging.orders;

		truncate table mysql_dwh_staging.orders;
	end;
"""

result = db.query(merge_qry)
print(result)

# import boto3,json
# from pg import DB

# secret_name = 'your-secret-name'
# region_name ='eu-west-1'

# session = boto3.session.Session()

# client = session.client(service_name='secretsmanager',region_name=region_name)

# get_secret_value_response = client.get_secret_value(SecretId=secret_name)

# creds = json.loads(get_secret_value_response['SecretString'])

Exemplo n.º 40

0

Exibir arquivo

for srv in args.dst:
    item = cfg[srv]
    print("")
    print(
        "------------------------------------------------------------------------------------"
    )
    print("Destination: " + str(srv))
    print(
        "------------------------------------------------------------------------------------"
    )
    dstdb = DB(dbname=item["db"],
               host=item["host"],
               port=int(item["port"]),
               user=item["user"],
               passwd=item["password"])

    for table in tables:
        dstdb.start()
        rows = srcdb.query('SELECT * FROM %s' % table).getresult()
        dstdb.query(
            'CREATE TEMPORARY TABLE newvals ON COMMIT DROP AS TABLE %s WITH NO DATA'
            % table)
        dstdb.inserttable('newvals', rows)
        dstdb.query('LOCK TABLE %s IN EXCLUSIVE MODE' % table)
        print(upd.get(table))
        dstdb.query(upd.get(table))
        print(insert.get(table))
        dstdb.query(insert.get(table))
        dstdb.commit()

Exemplo n.º 41

0

Exibir arquivo

Arquivo: pg2mongo.py Projeto: bblu/algopython

    global pgcnn
    sql = "select column_name,data_type from information_schema.columns where table_schema='public' and table_name = '%s'" % tbname
    columns = pgcnn.query(sql).namedresult()
    cols='select row_to_json(tb) as json from (select '
    for tup in columns:
        name = tup.column_name
        if(name!='zscale'):
            if(name == 'geometry'):
                name = 'st_asgeojson(st_transform(geometry,4326))as geometry'
            elif(name == 'wkb_geometry'):
                name = 'st_asgeojson(st_transform(wkb_geometry,4326))as geometry'
            cols+=name+','
    cols = cols.rstrip(',')
    cols += ' from %s tb;' % tbname
    return cols
pgtbs=pgcnn.query("select tablename from pg_tables where schemaname='public' and tablename like '%_layer' order by tablename;")
tbrows=pgtbs.namedresult()
#tbrows=['road_layer']
for row in tbrows:
    tbname=row.tablename
    print(tbname)
    sql = genSql(tbname)
    print(sql)
    dts=pgcnn.query(sql).namedresult()
    c = 0
    d = 0
    for dt in dts:
        dt.json['device_table']='public.'+tbname
        geo = dt.json['geometry']
        if geo:
            dt.json['geometry']=json.loads(geo)

Exemplo n.º 42

0

Exibir arquivo

Arquivo: pg2ConvexHull.py Projeto: bblu/algopython

            pts += cds
        else:
            for sub in cds:
                pts += sub
    print item_id,len(pts),
    if len(pts)>2:
        hull = Convex(pts)
        idxs = hull.vertices
        #idxs.append(idxs[0])
        print len(hull.vertices),hull.vertices
        drawHull(pts,idxs)
        wo(item_id,pts,idxs)
    elif len(pts)==2:
        print '*****************************'
        wb(item_id,pts)

pgtbs=pgcnn.query('select distinct item_id from equ_xl_dev order by item_id limit 1;')

for row in pgtbs.namedresult():
    #genSql(row.item_id)
    genSql("1616G011000R03")
    break

#测试
if __name__ == '__main__':
    ps = [[2, 2],[1, 1],[2, 1],[1.5, 1.5],[1, 2],[3, 1.5],[1.5, 1.2],[0.5, 2],[1.5, 0.5]]

    #[{'y': 0.5, 'x': 1.5}, {'y': 1.5, 'x': 3}, {'y': 2, 'x': 2}, {'y': 2, 'x': 1}, {'y': 2, 'x': 0.5}, {'y': 1, 'x': 1}]
    #print Convex.graham_scan(ps)

Exemplo n.º 43

0

Exibir arquivo

class SteadyPgConnection:
    """Class representing steady connections to a PostgreSQL database.

    Underlying the connection is a classic PyGreSQL pg API database
    connection which is reset if the connection is lost or used too often.
    Thus the resulting connection is steadier ("tough and self-healing").

    If you want the connection to be persistent in a threaded environment,
    then you should not deal with this class directly, but use either the
    PooledPg module or the PersistentPg module to get the connections.

    """

    version = __version__

    def __init__(self, maxusage=None, setsession=None, closeable=True,
            *args, **kwargs):
        """Create a "tough" PostgreSQL connection.

        maxusage: maximum usage limit for the underlying PyGreSQL connection
            (number of uses, 0 or None means unlimited usage)
            When this limit is reached, the connection is automatically reset.
        setsession: optional list of SQL commands that may serve to prepare
            the session, e.g. ["set datestyle to ...", "set time zone ..."]
        closeable: if this is set to false, then closing the connection will
            be silently ignored, but by default the connection can be closed
        args, kwargs: the parameters that shall be used to establish
            the PostgreSQL connections with PyGreSQL using pg.DB()

        """
        # basic initialization to make finalizer work
        self._con = None
        self._closed = True
        # proper initialization of the connection
        if maxusage is None:
            maxusage = 0
        if not isinstance(maxusage, int):
            raise TypeError("'maxusage' must be an integer value.")
        self._maxusage = maxusage
        self._setsession_sql = setsession
        self._closeable = closeable
        self._con = PgConnection(*args, **kwargs)
        self._transaction = False
        self._closed = False
        self._setsession()
        self._usage = 0

    def _setsession(self):
        """Execute the SQL commands for session preparation."""
        if self._setsession_sql:
            for sql in self._setsession_sql:
                self._con.query(sql)

    def _close(self):
        """Close the tough connection.

        You can always close a tough connection with this method
        and it will not complain if you close it more than once.

        """
        if not self._closed:
            try:
                self._con.close()
            except Exception:
                pass
            self._transaction = False
            self._closed = True

    def close(self):
        """Close the tough connection.

        You are allowed to close a tough connection by default
        and it will not complain if you close it more than once.

        You can disallow closing connections by setting
        the closeable parameter to something false. In this case,
        closing tough connections will be silently ignored.

        """
        if self._closeable:
            self._close()
        elif self._transaction:
            self.reset()

    def reopen(self):
        """Reopen the tough connection.

        It will not complain if the connection cannot be reopened.

        """
        try:
            self._con.reopen()
        except Exception:
            if self._transcation:
                self._transaction = False
                try:
                    self._con.query('rollback')
                except Exception:
                    pass
        else:
            self._transaction = False
            self._closed = False
            self._setsession()
            self._usage = 0

    def reset(self):
        """Reset the tough connection.

        If a reset is not possible, tries to reopen the connection.
        It will not complain if the connection is already closed.

        """
        try:
            self._con.reset()
            self._transaction = False
            self._setsession()
            self._usage = 0
        except Exception:
            try:
                self.reopen()
            except Exception:
                try:
                    self.rollback()
                except Exception:
                    pass

    def begin(self, sql=None):
        """Begin a transaction."""
        self._transaction = True
        try:
            begin = self._con.begin
        except AttributeError:
            return self._con.query(sql or 'begin')
        else:
            # use existing method if available
            if sql:
                return begin(sql=sql)
            else:
                return begin()

    def end(self, sql=None):
        """Commit the current transaction."""
        self._transaction = False
        try:
            end = self._con.end
        except AttributeError:
            return self._con.query(sql or 'end')
        else:
            if sql:
                return end(sql=sql)
            else:
                return end()

    def commit(self, sql=None):
        """Commit the current transaction."""
        self._transaction = False
        try:
            commit = self._con.commit
        except AttributeError:
            return self._con.query(sql or 'commit')
        else:
            if sql:
                return commit(sql=sql)
            else:
                return commit()

    def rollback(self, sql=None):
        """Rollback the current transaction."""
        self._transaction = False
        try:
            rollback = self._con.rollback
        except AttributeError:
            return self._con.query(sql or 'rollback')
        else:
            if sql:
                return rollback(sql=sql)
            else:
                return rollback()

    def _get_tough_method(self, method):
        """Return a "tough" version of a connection class method.

        The tough version checks whether the connection is bad (lost)
        and automatically and transparently tries to reset the connection
        if this is the case (for instance, the database has been restarted).

        """
        def tough_method(*args, **kwargs):
            transaction = self._transaction
            if not transaction:
                try: # check whether connection status is bad
                    if not self._con.db.status:
                        raise AttributeError
                    if self._maxusage: # or connection used too often
                        if self._usage >= self._maxusage:
                            raise AttributeError
                except Exception:
                    self.reset() # then reset the connection
            try:
                result = method(*args, **kwargs) # try connection method
            except Exception: # error in query
                if transaction: # inside a transaction
                    self._transaction = False
                    raise # propagate the error
                elif self._con.db.status: # if it was not a connection problem
                    raise # then propagate the error
                else: # otherwise
                    self.reset() # reset the connection
                    result = method(*args, **kwargs) # and try one more time
            self._usage += 1
            return result
        return tough_method

    def __getattr__(self, name):
        """Inherit the members of the standard connection class.

        Some methods are made "tougher" than in the standard version.

        """
        if self._con:
            attr = getattr(self._con, name)
            if (name in ('query', 'get', 'insert', 'update', 'delete')
                    or name.startswith('get_')):
                attr = self._get_tough_method(attr)
            return attr
        else:
            raise InvalidConnection

    def __del__(self):
        """Delete the steady connection."""
        try:
            self._close() # make sure the connection is closed
        except Exception:
            pass

Exemplo n.º 44

0

Exibir arquivo

Arquivo: IP2CountryConverter.py Projeto: jt1/honeypot-ssh

            self.country = 0
    def handle_endtag(self, tag):
        if tag == "th":
            self.start = 0
    def handle_data(self, data):
        if self.start and data == "Country:":
            self.country = 1
        elif self.found :
            self.data.append(data)
            self.found = 0

if __name__ == '__main__':
    db = DB(dbname='honeypotSSH', host='localhost', user='******', passwd='1')
    while True:
        try:
            ip = DB.query(db, "SELECT ip from connections where ip <>'' and country is NULL LIMIT 1").getresult()[0][0]
            print ip
        except Exception:
            exit(1)
        try:
            country = DB.query(db, "SELECT country from connections where ip = '" + ip + "' and country is not NULL LIMIT 1").getresult()[0][0]
        except IndexError:
            country = 'Not Found'
        print "Local: " + country
        if ip[:8] in '192.168.' or ip[:3] in '10.':
            country = 'Private'
            print "Private"
        if ip in '127.0.0.1':
            country = 'Loopback'
            print "Loopback"
        if country is 'Not Found':

Exemplo n.º 45

0

Exibir arquivo

class BotDB:
    def __init__(self, db_url):
        urlparse.uses_netloc.append("postgres")
        self.__db_url = db_url
        url = urlparse.urlparse(db_url)
        self.__db = DB(
            dbname=url.path[1:],
            user=url.username,
            passwd=url.password,
            host=url.hostname,
            port=url.port
        )

    def insertThesis(self, init_id, chat_id, user_id, body):
        ts = time.time()
        timestamp = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
        print("inserting thesis")
        print(init_id, chat_id, user_id, body, timestamp)
        self.__db.insert("theses", row={"init_id": init_id, "chat_id": chat_id, "user_id": user_id, "body": body})
        print("done")
        self.__db.commit()

    def getThesisByIds(self, init_id, chat_id):
        query = self.__db.query("SELECT * FROM theses WHERE init_id = %d AND chat_id = %d;" % (init_id, chat_id))
        dict_res = query.dictresult()
        if len(dict_res) == 0:
            return False
        else:
            return dict_res[0]

    def getThesisByBody(self, body):
        query = self.__db.query("SELECT * FROM theses WHERE body = '%s';" % body)
        dict_res = query.dictresult()
        if len(dict_res) == 0:
            return False
        else:
            return dict_res[0]

    def getLastThesesByTime(self, chat_id, interval):
        query = self.__db.query("SELECT * FROM theses WHERE chat_id = %s AND creation_time > current_timestamp - interval '%s';" % (chat_id, interval))
        dict_res = query.dictresult()
        if len(dict_res) == 0:
            return False
        else:
            return dict_res

    def getTodayTheses(self, chat_id):
        query = self.__db.query("SELECT * FROM theses WHERE chat_id = %s AND creation_time > current_date;" % chat_id)
        dict_res = query.dictresult()
        if len(dict_res) == 0:
            return False
        else:
            return dict_res

    def insertUser(self, user_id, username, first_name, last_name):
        # ts = time.time()
        row = {"user_id":user_id}
        if username:
            row["username"] = username
        if first_name:
            row["first_name"] = first_name
        if last_name:
            row["last_name"] = last_name
        self.__db.insert('users', row=row)
        self.__db.commit()

    def getUserById(self, user_id):
        query = self.__db.query("SELECT * FROM users WHERE user_id = %d;" % user_id)
        dict_res = query.dictresult()
        if len(dict_res) == 0:
            return False
        else:
            return dict_res[0]

    def insertBotMessage(self, chat_id, message_id, owner_id):
        row = {"chat_id": chat_id, "message_id": message_id, "owner_id": owner_id}
        self.__db.insert('bot_messages', row=row)
        self.__db.commit()

    def getBotMessage(self, chat_id, message_id):
        query = self.__db.query("SELECT * FROM bot_messages WHERE chat_id = %d AND message_id = %d;" % (chat_id, message_id))
        dict_res = query.dictresult()
        if len(dict_res) == 0:
            return False
        else:
            return dict_res[0]

    def close(self):
        self.__db.close()

Exemplo n.º 46

0

Exibir arquivo

Arquivo: SteadyPg.py Projeto: ddimmich/Cubulus

class SteadyPgConnection:
	"""Class representing steady connections to a PostgreSQL database.

	Underlying the connection is a classic PyGreSQL pg API database
	connection which is reset if the connection is lost or used too often.
	Thus the resulting connection is steadier ("tough and self-healing").

	If you want the connection to be persistent in a threaded environment,
	then you should not deal with this class directly, but use either the
	PooledPg module or the PersistentPg module to get the connections.

	"""

	def __init__(self, maxusage=0, setsession=None, *args, **kwargs):
		"""Create a "tough" PostgreSQL connection.

		maxusage: maximum usage limit for the underlying PygreSQL connection
			(number of uses, 0 or False means unlimited usage)
			When this limit is reached, the connection is automatically reset.
		setsession: optional list of SQL commands that may serve to prepare
			the session, e.g. ["set datestyle to ...", "set time zone ..."]
		args, kwargs: the parameters that shall be used to establish
			the PostgreSQL connections with PyGreSQL using pg.DB()

		"""
		self._maxusage = maxusage
		self._setsession_sql = setsession
		self._closeable = 1
		self._usage = 0
		self._con = PgConnection(*args, **kwargs)
		self._setsession()

	def _setsession(self):
		"""Execute the SQL commands for session preparation."""
		if self._setsession_sql:
			for sql in self._setsession_sql:
				self._con.query(sql)

	def _close(self):
		"""Close the tough connection.

		You can always close a tough connection with this method
		and it will not complain if you close it more than once.

		"""
		try:
			self._con.close()
			self._usage = 0
		except:
			pass

	def close(self):
		"""Close the tough connection.

		You are allowed to close a tough connection by default
		and it will not complain if you close it more than once.

		You can disallow closing connections by setting
		the _closeable attribute to 0 or False. In this case,
		closing a connection will be silently ignored.

		"""
		if self._closeable:
			self._close()

	def reopen(self):
		"""Reopen the tough connection.

		It will not complain if the connection cannot be reopened."""
		try:
			self._con.reopen()
			self._setsession()
			self._usage = 0
		except:
			pass

	def reset(self):
		"""Reset the tough connection.

		If a reset is not possible, tries to reopen the connection.
		It will not complain if the connection is already closed.

		"""
		try:
			self._con.reset()
			self._setsession()
			self._usage = 0
		except:
			self.reopen()

	def _get_tough_method(self, method):
		"""Return a "tough" version of a connection class method.

		The tough version checks whether the connection is bad (lost)
		and automatically and transparently tries to reset the connection
		if this is the case (for instance, the database has been restarted).

		"""
		def tough_method(*args, **kwargs):
			try: # check whether connection status is bad
				if not self._con.db.status:
					raise AttributeError
				if self._maxusage: # or connection used too often
					if self._usage >= self._maxusage:
						raise AttributeError
			except:
				self.reset() # then reset the connection
			try:
				r = method(*args, **kwargs) # try connection method
			except: # error in query
				if self._con.db.status: # if it was not a connection problem
					raise # then propagate the error
				else: # otherwise
					self.reset() # reset the connection
					r = method(*args, **kwargs) # and try one more time
			self._usage += 1
			return r
		return tough_method

	def __getattr__(self, name):
		"""Inherit the members of the standard connection class.

		Some methods are made "tougher" than in the standard version.

		"""
		attr = getattr(self._con, name)
		if name in ('query', 'get', 'insert', 'update', 'delete') \
			or name.startswith('get_'):
			attr = self._get_tough_method(attr)
		return attr

Exemplo n.º 47

0

Exibir arquivo

 def setUp(self):
     """Setup test tables or empty them if they already exist."""
     db = DB(dbname=dbname, host=dbhost, port=dbport)
     db.query("set datestyle to 'iso'")
     db.query("set default_with_oids=false")
     db.query("set standard_conforming_strings=false")
     db.query("set client_min_messages=warning")
     db.query("drop table if exists fruits cascade")
     db.query("create table fruits(id serial primary key, name varchar)")
     self.db = db

Exemplo n.º 48

0

Exibir arquivo

Arquivo: resetDB.py Projeto: digitalOrca/Spyhop

#!/usr/bin/python

from pg import DB

db = DB(dbname="interactive_brokers",
        host="localhost",
        port=5432,
        user="******",
        passwd="profit")
tables = ("benchmark", "fundamental_ratios", "histogram", "tick",
          "tick_history")
for table in tables:
    deletequery = "DELETE FROM %s" % table
    resetquery = "ALTER SEQUENCE %s_index_seq RESTART WITH 1" % table
    db.query(deletequery)
    try:
        db.query(resetquery)
    except:
        continue

Exemplo n.º 49

0

Exibir arquivo

Arquivo: drop_resume.py Projeto: TrojanLij/python_api

from pg import DB
import random
import configparser

config = configparser.ConfigParser()
config.read('config.ini')

db = DB(host=config['mysqlDB']['hostname'], user=config['mysqlDB']['username'], passwd=config['mysqlDB']['password'], dbname=config['mysqlDB']['database']) #connect to DB with given credentials and information

db.query("DROP TABLE users;")
print "Dropped"

db.query("CREATE TABLE users( id SERIAL PRIMARY KEY, name VARCHAR(100) NOT NULL, caller_id VARCHAR(20) NOT NULL);")  
print "Created"

Exemplo n.º 50

0

Exibir arquivo

Arquivo: xml2postgres.py Projeto: pasaranax/RESTful-Wiki

'''
Created on 1 июня 2016 г.

@author: Михаил Булыгин <*****@*****.**>
'''
import xml.etree.ElementTree as ET
from pg import DB


if __name__ == '__main__':
    file = open("ruwiki.xml", encoding="utf-8")
    tree = ET.iterparse(file)
    
    db = DB(host="localhost", user="******", passwd="1234", dbname="wiki")
    db.query("TRUNCATE TABLE wiki")
    
    for i, line in enumerate(tree):
        event, element = line
        if element.tag == "page":
            pageid = element.find("id").text
            title = element.find("title").text
            timestamp = element.find("revision").find("timestamp").text.replace("T", " ")
            username = element.find("revision").find("contributor").find("username")
            if not username is None:
                author = username.text
            else:
                author = element.find("revision").find("contributor").find("ip").text
            text = element.find("revision").find("text").text
            
            db.insert("wiki", id=pageid, title=title, timestamp=timestamp, author=author, text=text)

Exemplo n.º 51

0

Exibir arquivo

Arquivo: pygresql.py Projeto: michielkauwatjoe/Patches

def main():
    db = DB(dbname='databasename', host='127.0.0.1', port=5432, user='******', passwd='...')
    for tablename in ('table1', 'table2'):
        q = db.query('select * from %s where ...' % tablename)
        d = q.dictresult()
        json.dump(d, open("%s.txt" % tablename,'w'))

Exemplo n.º 52

0

Exibir arquivo

Arquivo: first.py Projeto: geog465-spring16/class-space

# Database name : testdatabase
# Database server: localhost (my computer)
# Database port: 5432 (default portnumber)
# Username : postgres
# Password : admin

from pg import DB

db = DB(dbname='testdatabase', host='localhost', port=5432, user='******', passwd='admin')

print("Connection to the database: OK")

print("Clear database...")


db.query("drop TABLE IF EXISTS location")
db.query("drop TABLE IF EXISTS cityfeatures")
db.query("drop TABLE IF EXISTS feature")
db.query("drop TABLE IF EXISTS city")


print("Now creating database objects :")



print("Table - Feature")

db.query(" CREATE TABLE feature ( "
         "   featureId smallint  NOT NULL,"
         "   featureName character varying(50)  NOT NULL,"
         "   CONSTRAINT Feature_pk PRIMARY KEY (featureId))"

Exemplo n.º 53

0

Exibir arquivo

Arquivo: summary_stats.py Projeto: cuulee/geobia

# db.query("ALTER TABLE "+config.get('summary_stats', 'shp_table_name')+" ADD column attributes TEXT")
#
# print "importing raster"
# db.query("DROP TABLE IF EXISTS "+config.get('summary_stats', 'raster_table_name'))
# p1 = subprocess.Popen(['/usr/bin/raster2pgsql', '-d', '-s', config.get('summary_stats', 'raster_file_srs'), '-t', '100x100',  config.get('summary_stats', 'raster_file'),
#                        config.get('summary_stats', 'raster_table_name')], stdout=subprocess.PIPE)
# p2 = subprocess.Popen(['/usr/bin/psql', '-p', '5432', config.get('database', 'dbname'), '-U', config.get('database', 'user'), '-h', '127.0.0.1'],
#                       stdin=p1.stdout, stdout=subprocess.PIPE, env=psql_env)
# p1.stdout.close()  # Allow p1 to receive a SIGPIPE if p2 exits.
# output,err = p2.communicate()
#
# db.query('CREATE INDEX dem_st_convexhull_idx ON '+config.get('summary_stats', 'raster_table_name')+' '
#                                'USING gist ((st_convexhull(rast)) public.gist_geometry_ops_2d)')

print("updating attributes")
q = db.query("SELECT count(*) as count_all from " +
             config.get('summary_stats', 'shp_table_name'))
count_all = q.dictresult()[0]["count_all"]
steps = count_all / 500
for s in range(0, steps):
    try:
        db.begin()
        db.query('select raster.updtae_attributes($1, $2, $3, $4)',
                 (config.get('summary_stats', 'raster_table_name'),
                  config.get('summary_stats', 'shp_table_name'), 500, s * 500))
        db.commit()
        print("processed " + str((s * 500)))
    except Exception, e:
        print(str(e))
        pass

Exemplo n.º 54

0

Exibir arquivo

Arquivo: insert2pgdb_well.py Projeto: HuskyHsu/Cross-Correlation-of-Discrete-and-Continuous-data

@author: Shih-Chi
"""

from pg import DB
import numpy as np
import datetime

db = DB(dbname='Pingtung', host='localhost', port=5432,
    user='******', passwd='husky')

import os
for filename in os.listdir('Pingtung/Well'):
    site = filename[0:8]
    year = filename[9:13]
    print(filename)
    sites = db.query('select site, starttime, endtime from "Well_Stations" where site like \'{site}\''.format(site=site)).getresult()
    if sites != []:
        for line in open('Pingtung/Well/{FN}'.format(FN=filename), 'r'):
            #檢查時間是否相符
            data = line.split()
            if len(data[1]) == 8:
                if (data[0][0:4] != year) and (data[0][5:10] != '01-01'):
                    print('檔案有誤 日期不對')
                    break
            else:
                print('檔案有誤 時間不對')
                break
                
            db.insert('Well', site=site, date='{date} {time}'.format(date=data[0], time=data[1]), value=data[2])
    else:
        print('查無此站')

Exemplo n.º 55

0

Exibir arquivo

Arquivo: utilities.py Projeto: winston-bosan/bdit_volumes

class vol_utils(object):
    def __init__(self):
        self.logger = logging.getLogger('volume_project.sql_utilities')
        self.db_connect()

    def db_connect(self):
        CONFIG = configparser.ConfigParser()
        CONFIG.read('db.cfg')
        dbset = CONFIG['DBSETTINGS']
        self.db = DB(dbname=dbset['database'],
                     host=dbset['host'],
                     user=dbset['user'],
                     passwd=dbset['password'])
        self.logger.info('Database connected.')

    def exec_file(self, filename):
        try:
            f = open(filename)
            exec(filename)
        except:
            for root_f, folders, files in os.walk('.'):
                if filename in files:
                    f = root_f + '/' + filename
                    break
            self.logger.info('Running ', f)
            exec(f)

        if f is None:
            self.logger.error('File %s not found!', filename)
            raise Exception('File %s not found!', filename)

    def execute_sql(self, filename):
        f = None
        try:
            f = open(filename)
        except:
            for root_f, folders, files in os.walk('.'):
                if filename in files:
                    f = open(root_f + '/' + filename)
        if f is None:
            self.logger.error('File %s not found!', filename)
            raise Exception('File not found!')

        sql = f.read()
        reconnect = 0
        while True:
            try:
                self.db.query(sql)
                self.db.commit()
                return
            except ProgrammingError as pe:
                self.logger.error('Error in SQL', exc_info=True)
                self.db_connect()
                reconnect += 1
            if reconnect > 5:
                raise Exception('Check DB connection. Cannot connect')

    def get_sql_results(self,
                        filename,
                        columns,
                        replace_columns=None,
                        parameters=None):
        '''
        Input:
            filename
            columns: a list of column names
            replace_columns: a dictionary of {placeholders:real strings}
            parameters: list of parameter values
        Output:
            dataframe of results
        '''

        f = None
        try:
            f = open(filename)
        except:
            for root_f, folders, files in os.walk('.'):
                if filename in files:
                    f = open(root_f + '/' + filename)

        if f is None:
            if filename[:
                        6] == 'SELECT':  # Also accepts sql queries directly in string form
                sql = filename
            else:
                self.logger.error('File %s not found!', filename)
                raise Exception('File not found!')
        else:
            sql = f.read()

        if replace_columns is not None:
            for key, value in replace_columns.items():
                sql = sql.replace(key, str(value))

        reconnect = 0
        while True:
            try:
                if parameters is not None:
                    return pd.DataFrame(self.db.query(sql,
                                                      parameters).getresult(),
                                        columns=columns)
                else:
                    return pd.DataFrame(self.db.query(sql).getresult(),
                                        columns=columns)
            except ProgrammingError as pe:
                self.logger.error('Error in SQL', exc_info=True)
                self.db_connect()
                reconnect += 1
            if reconnect > 5:
                raise Exception('Check Error Message')

    def load_pkl(self, filename):
        f = None
        try:
            f = open(filename, "rb")
        except:
            for root_f, folders, files in os.walk('.'):
                if filename in files:
                    f = open(root_f + '/' + filename)
        if f is None:
            self.logger.error('File %s not found!', filename)
            raise Exception('File not found!')

        return pickle.load(f)

    def truncatetable(self, tablename):
        reconnect = 0
        while True:
            try:
                self.db.truncate(tablename)
                self.db.commit()
                self.logger.info('%s truncated', tablename)
                return
            except ProgrammingError as pe:
                print(pe)
                self.db_connect()
                reconnect += 1
            if reconnect > 5:
                self.logger.error('Error in SQL', exc_info=True)
                raise Exception('Check Error Message')

    def inserttable(self, tablename, content):
        reconnect = 0
        while True:
            try:
                self.db.inserttable(tablename, content)
                self.db.commit()
                self.logger.info('Inserted table: %s', tablename)
                break
            except ProgrammingError:
                self.db_connect()
                reconnect += 1
            if reconnect > 5:
                self.logger.error('Error in SQL', exc_info=True)
                raise Exception('Check Error Message')

    def __exit__(self):
        self.db.close()

Exemplo n.º 56

0

Exibir arquivo

Arquivo: AcquireCR.py Projeto: allendu91/VMDB

from pg import DB
PG=DB(dbname='VM', host='192.168.1.112' ,port=5432,user='******',passwd='123456')
sql="select A.sample_time,A.stat_name,B.stat_rollup,B.unit,A.stat_group,A.entity,A.stat_value from hist_stat_daily A ,stat_counters B where A.stat_id=B.id "
q=PG.query(sql)
rows=q.getresult()
for row in rows:
    PG.insert("vm_state",time=row[0],stat_name=row[1],stat_rollup_type=row[2],unit=row[3],stat_group=row[4],entity=row[5],stat_value=row[6])
print("成功")
PG.close()

Exemplo n.º 57

0

Exibir arquivo

Arquivo: app.py Projeto: gabreal/vagrant-ansible-docker

class pgcon(object):
    def __init__(self):
        self.pgsql = {}
        self.result = None
        self.status = "Unknown"
        self.db = None

    def query_consul(self):
        '''
        at first try to get the consul coordinates from the environment. these 
        should be set by docker due to container linking. then fetch postgresql 
        container coordinates.
        '''

        try:
            g.consul_server = os.environ['CONSUL_PORT_8500_TCP_ADDR']
            g.consul_port = os.environ['CONSUL_PORT_8500_TCP_PORT']
        except:
            if app.debug:
                g.consul_server = '172.17.0.2'
                g.consul_port = '8500'
            else:
                raise EnvironmentError(
                    'No consul environment variables available')

        try:
            c = consul.Consul(host=g.consul_server, port=g.consul_port)
            cresponse = c.kv.get('postgresql', recurse=True)[1]
        except:
            raise LookupError('Error in connecting to the Consul server')

        try:
            for d in cresponse:
                v = d['Value']
                k = d['Key'].split('/')[-1]
                self.pgsql[k] = v
        except:
            raise AttributeError('Something is wrong with Consuls response')

    def connect(self):
        if not self.pgsql:
            raise ValueError('No coordinates to connect to the db.')
        try:
            self.db = DB(dbname=self.pgsql['user'],
                         host=self.pgsql['host'],
                         port=int(self.pgsql['port']),
                         user=self.pgsql['user'],
                         passwd=self.pgsql['password'])
        except:
            raise IOError('Could not connect to the db.')
        self.status = "Connected"

    def query(self, p=None):
        '''
        connect to the db and retrieve something
        '''

        if not self.db:
            self.connect()

        if not self.db:
            self.result = {'postgresql db: ': 'not connected'}
            return

        if p:
            self.result = self.db.query(p)

    def call(self, m):
        if not self.db:
            self.connect()
        self.result = getattr(self.db, m)()

    def disconnect(self):
        try:
            self.db.close()
        except:
            pass

Exemplo n.º 58

0

Exibir arquivo

Arquivo: summary_stats.py Projeto: geosynergy/geobia

# db.query("ALTER TABLE "+config.get('summary_stats', 'shp_table_name')+" ADD column attributes TEXT")
#
# print "importing raster"
# db.query("DROP TABLE IF EXISTS "+config.get('summary_stats', 'raster_table_name'))
# p1 = subprocess.Popen(['/usr/bin/raster2pgsql', '-d', '-s', config.get('summary_stats', 'raster_file_srs'), '-t', '100x100',  config.get('summary_stats', 'raster_file'),
#                        config.get('summary_stats', 'raster_table_name')], stdout=subprocess.PIPE)
# p2 = subprocess.Popen(['/usr/bin/psql', '-p', '5432', config.get('database', 'dbname'), '-U', config.get('database', 'user'), '-h', '127.0.0.1'],
#                       stdin=p1.stdout, stdout=subprocess.PIPE, env=psql_env)
# p1.stdout.close()  # Allow p1 to receive a SIGPIPE if p2 exits.
# output,err = p2.communicate()
#
# db.query('CREATE INDEX dem_st_convexhull_idx ON '+config.get('summary_stats', 'raster_table_name')+' '
#                                'USING gist ((st_convexhull(rast)) public.gist_geometry_ops_2d)')

print("updating attributes")
q = db.query("SELECT count(*) as count_all from "+config.get('summary_stats', 'shp_table_name'))
count_all = q.dictresult()[0]["count_all"]
steps = count_all/500
for s in range(0, steps):
    try:
        db.begin()
        db.query('select raster.updtae_attributes($1, $2, $3, $4)', (config.get('summary_stats', 'raster_table_name'),
                                                             config.get('summary_stats', 'shp_table_name'),
                                                                     500, s*500))
        db.commit()
        print("processed "+str((s * 500)))
    except Exception, e:
        print(str(e))
        pass