Exemplo n.º 1
0
class vol_utils(object):
    def __init__(self):
        self.logger = logging.getLogger('volume_project.sql_utilities')
        self.db_connect()

    def db_connect(self):
        CONFIG = configparser.ConfigParser()
        CONFIG.read('db.cfg')
        dbset = CONFIG['DBSETTINGS']
        self.db = DB(dbname=dbset['database'],
                     host=dbset['host'],
                     user=dbset['user'],
                     passwd=dbset['password'])
        self.logger.info('Database connected.')

    def exec_file(self, filename):
        try:
            f = open(filename)
            exec(filename)
        except:
            for root_f, folders, files in os.walk('.'):
                if filename in files:
                    f = root_f + '/' + filename
                    break
            self.logger.info('Running ', f)
            exec(f)

        if f is None:
            self.logger.error('File %s not found!', filename)
            raise Exception('File %s not found!', filename)

    def execute_sql(self, filename):
        f = None
        try:
            f = open(filename)
        except:
            for root_f, folders, files in os.walk('.'):
                if filename in files:
                    f = open(root_f + '/' + filename)
        if f is None:
            self.logger.error('File %s not found!', filename)
            raise Exception('File not found!')

        sql = f.read()
        reconnect = 0
        while True:
            try:
                self.db.query(sql)
                self.db.commit()
                return
            except ProgrammingError as pe:
                self.logger.error('Error in SQL', exc_info=True)
                self.db_connect()
                reconnect += 1
            if reconnect > 5:
                raise Exception('Check DB connection. Cannot connect')

    def get_sql_results(self,
                        filename,
                        columns,
                        replace_columns=None,
                        parameters=None):
        '''
        Input:
            filename
            columns: a list of column names
            replace_columns: a dictionary of {placeholders:real strings}
            parameters: list of parameter values
        Output:
            dataframe of results
        '''

        f = None
        try:
            f = open(filename)
        except:
            for root_f, folders, files in os.walk('.'):
                if filename in files:
                    f = open(root_f + '/' + filename)

        if f is None:
            if filename[:
                        6] == 'SELECT':  # Also accepts sql queries directly in string form
                sql = filename
            else:
                self.logger.error('File %s not found!', filename)
                raise Exception('File not found!')
        else:
            sql = f.read()

        if replace_columns is not None:
            for key, value in replace_columns.items():
                sql = sql.replace(key, str(value))

        reconnect = 0
        while True:
            try:
                if parameters is not None:
                    return pd.DataFrame(self.db.query(sql,
                                                      parameters).getresult(),
                                        columns=columns)
                else:
                    return pd.DataFrame(self.db.query(sql).getresult(),
                                        columns=columns)
            except ProgrammingError as pe:
                self.logger.error('Error in SQL', exc_info=True)
                self.db_connect()
                reconnect += 1
            if reconnect > 5:
                raise Exception('Check Error Message')

    def load_pkl(self, filename):
        f = None
        try:
            f = open(filename, "rb")
        except:
            for root_f, folders, files in os.walk('.'):
                if filename in files:
                    f = open(root_f + '/' + filename)
        if f is None:
            self.logger.error('File %s not found!', filename)
            raise Exception('File not found!')

        return pickle.load(f)

    def truncatetable(self, tablename):
        reconnect = 0
        while True:
            try:
                self.db.truncate(tablename)
                self.db.commit()
                self.logger.info('%s truncated', tablename)
                return
            except ProgrammingError as pe:
                print(pe)
                self.db_connect()
                reconnect += 1
            if reconnect > 5:
                self.logger.error('Error in SQL', exc_info=True)
                raise Exception('Check Error Message')

    def inserttable(self, tablename, content):
        reconnect = 0
        while True:
            try:
                self.db.inserttable(tablename, content)
                self.db.commit()
                self.logger.info('Inserted table: %s', tablename)
                break
            except ProgrammingError:
                self.db_connect()
                reconnect += 1
            if reconnect > 5:
                self.logger.error('Error in SQL', exc_info=True)
                raise Exception('Check Error Message')

    def __exit__(self):
        self.db.close()
Exemplo n.º 2
0
class PostGreDBConnector:
    """PostGreDBConnector opens a PostGre DB connection. Different functions allow you to add, delete or update
    documents in PostGre DB."""

    def __init__(self):
        """Connecting to localhost (default host and port [localhost, 4532]) PostGre DB and initializing needed data
            base and tables."""
        try:
            print("Connecting to PostGre DB...")
            self.__db = DB(dbname='testdb', host='localhost', port=5432, user='******', passwd='superuser')
            print("PostGre DB connection successfully built.")
        except ConnectionError:
            print("PostGre DB connection could not be built.")

        self.delete_all_data()
        self.drop_all_tables()

    def close_connection(self):
        self.__db.close()

    def create_schema(self, schema_name):
        self.__db.query("CREATE SCHEMA " + schema_name)
        self.__create_tables(schema_name)
        self.__create_functions(schema_name)

    def __create_tables(self, schema):
        """Create needed tables for RDF parsing."""
        schema += "."
        self._add_table("CREATE TABLE " + schema + "texts (id serial primary key, title text)")
        self._add_table(
            "CREATE TABLE " + schema + "bscale (id serial primary key, bscale text, nominal bool, ordinal bool, interval bool)")
        self._add_table("CREATE TABLE " + schema + "bsort (id serial primary key, bsort text)")
        self._add_table("CREATE TABLE " + schema + "pattern (id serial primary key, pattern text)")
        self._add_table("CREATE TABLE " + schema + "single_pattern (id serial primary key, single_pattern text)")
        self._add_table("CREATE TABLE " + schema + "snippets (id serial primary key, snippet text)")

        # relations
        self._add_table("CREATE TABLE " + schema + "has_attribute (bsort_id int, bscale_id integer[], aggregation int)")
        self._add_table("CREATE TABLE " + schema + "has_object (bscale_id int, pattern_id integer[], aggregation int)")
        self._add_table(
            "CREATE TABLE " + schema + "pattern_single_pattern (pattern_id int, single_pattern_id integer[], aggregation int)")
        self._add_table("CREATE TABLE " + schema + "texts_snippets (text_id int primary key, snippet_id integer[], aggregation int)")
        self._add_table(
            "CREATE TABLE " + schema + "snippet_offsets (id serial primary key,"
            " single_pattern_id int, snippet_id int, offsets integer[][], aggregation int)")

        # adjective and verb extractions
        self._add_table("CREATE TABLE " + schema + "subject_occ (id serial primary key, subject text, count int)")
        self._add_table("CREATE TABLE " + schema + "adjective_occ (id serial primary key, adjective text, count int)")
        self._add_table("CREATE TABLE " + schema + "verb_occ (id serial primary key, verb text, count int)")
        self._add_table("CREATE TABLE " + schema + "object_occ (id serial primary key, object text, count int)")
        self._add_table("CREATE TABLE " + schema + "subject_adjective_occ (id serial primary key, subject int, adjective int, count int, pmi float)")
        self._add_table("CREATE TABLE " + schema + "subject_object_occ (id serial primary key, subject int, object int, count int, pmi float)")
        self._add_table("CREATE TABLE " + schema + "object_verb_occ (id serial primary key, object int, verb int, count int, pmi float)")
        self._add_table("CREATE TABLE " + schema + "subject_verb_occ (id serial primary key, subject int, verb int, count int, pmi float)")

        # correlating pattern
        self._add_table("CREATE TABLE " + schema + "bscale_single_pattern (id serial primary key, bscale_id int, single_pattern_id int, single_pattern text, count int)")
        self._add_table(
            "CREATE TABLE " + schema + "correlating_pattern (id serial primary key, pattern_a int, pattern_b int, count int, pmi float)")

    def __create_functions(self, schema):
        """Create all necessary functions to aggregate the results saved in the database."""
        schema += "."
        self.add_function(schema + "aggregate_texts_snippets", "SELECT text_id, array_length(snippet_id, 1) FROM " + schema + "texts_snippets")
        self.add_function(schema + "aggregate_snippet_offsets", "SELECT id, array_length(offsets, 1) FROM " + schema + "snippet_offsets")

    def add_function(self, name, function):
        """Create a new function in the db."""
        create_function = "CREATE FUNCTION "
        returns = "() RETURNS SETOF RECORD AS "
        lang = " LANGUAGE SQL"
        query = create_function + name + returns + add_quotes(function) + lang
        self.__db.query(query)

    def _add_table(self, query):
        """Create a new table with a query."""
        self.__db.query(query)

    def add_table(self, schema, name, rows):
        """Create a new table with a name and rows given in query form."""
        create_table = "CREATE TABLE "
        query = create_table + schema + "." + name + rows
        self.__db.query(query)

    def insert(self, schema, table, row):
        """Insert a new row element into a specified table."""
        return self.__db.insert(schema + "." + table, row)

    def is_in_table(self, schema, table, where_clause):
        """Returns whether a row already exists in a table or not."""
        select = "SELECT * FROM "
        where = " WHERE "
        q = select + schema + "." + table + where + where_clause
        result = self.__db.query(q).dictresult()
        if len(result) > 0:
            return True
        else:
            return False

    def update(self, schema, table, values, where_clause):
        """Update an entry in a specified table."""
        UPDATE = "UPDATE "
        SET = " SET "
        WHERE = " WHERE "
        query = UPDATE + schema + "." + table + SET + values + WHERE + where_clause
        self.query(query)

    def get(self, schema, table, where_clause, key):
        """Return the key of a specific item in a table."""
        select = "SELECT "
        _from = " FROM "
        where = " WHERE "
        q = select + key + _from + schema + "." + table + where + where_clause
        result = self.__db.query(q).dictresult()
        if len(result) > 0:
            return result[0][key]
        else:
            return None

    def get_data_from_table(self, schema, table):
        """Gets all data available in a specific table."""
        return self.__db.query("SELECT * FROM " + schema + "." + table).dictresult()

    def get_id(self, schema, table, where_clause):
        """Return the id of an item in a table. If found return id number of found item, else None."""
        select = "SELECT id FROM "
        where = " WHERE "
        q = select + schema + "." + table + where + where_clause
        result = self.__db.query(q).dictresult()
        if len(result) > 0:
            return result[0]['id']
        else:
            return None

    def delete_from_table(self, schema, table, row):
        """Delete a row element form a specific table."""
        return self.__db.delete(schema + "." + table, row)

    def delete_data_in_table(self, schema, table):
        """Delete all data in a specific table."""
        self.__db.truncate(schema + "." + table, restart=True, cascade=True, only=False)

    def delete_all_data(self):
        """Deletes all data from all existing tables."""
        tables = self.get_tables()
        for table in tables:
            table_name = str(table)
            self.__db.truncate(table_name, restart=True, cascade=True, only=False)

    def get_tables(self):
        """Get all available tables in the database."""
        return self.__db.get_tables()

    def get_attributes(self, schema, table):
        """Get all attributes of a specified table."""
        return self.__db.get_attnames(schema + "." + table)

    def drop_table(self, schema, table):
        """Drops a specified table."""
        query = "DROP TABLE "
        self.__db.query(query + schema + "." + table)

    def drop_all_tables(self):
        """Drops all existing tables."""
        tables = self.get_tables()
        table_names = ""
        if len(tables) > 0 :
            for ind, table in enumerate(tables):
                if ind == 0:
                    table_names = str(table)
                else:
                    table_names = table_names + ", " + str(table)
            self.__db.query("DROP TABLE " + table_names)
        else:
            print("Nothing to delete.")

    def get_all(self, schema, table, attribute):
        """Gets one or more attributes of all entries from a specified table."""
        select = "SELECT "
        _from = " FROM "
        query = select + attribute + _from + schema + "." + table
        return self.__db.query(query).dictresult()

    def query(self, query):
        """Sends a query to the database."""
        result = self.__db.query(query)
        if result is not None:
            if not isinstance(result, str):
                return result.dictresult()
        else:
            return result
             tree.findall(".//EndTime"), tree.findall(".//WorkPeriod"),
             tree.findall(".//Expired"), tree.findall(".//Signing"),
             tree.findall(".//Notification"), tree.findall(".//WorkEventType"),
             tree.findall(".//Contractor"), tree.findall(".//PermitType"),
             tree.findall(".//Description")):
    rowsql = []
    for x in a:
        if x.tag in ('LastUpdated', 'StartTime',
                     'EndTime') and x.text is not None:
            y = time.strftime('%Y-%m-%d %H:%M:%S',
                              time.localtime(float(x.text) / 1000))
        else:
            y = x.text
        if x.tag == 'Name':
            rowsql.append(None)
            rowsql.append(None)
            rowsql.append(None)
        rowsql.append(y)
    rowsql.append(None)
    data.append(rowsql)

db.truncate('city.restrictions_import')
db.inserttable('city.restrictions_import', data)

sql = db.query(
    "DELETE FROM city.restrictions USING city.restrictions_import WHERE city.restrictions.id = city.restrictions_import.id"
)
sql = db.query(
    "INSERT INTO city.restrictions SELECT * FROM city.restrictions_import")
db.close()
Exemplo n.º 4
0
            chain.append(current)
            root.remove(current)
            to_visit.extend(list(pairs.groupby('c1').get_group(current)['c2']))
            visited.append(current)

    chains.append(chain)

groups = {}
count = 1
table = []
for group in chains:
    for tcl in group:
        table.append([tcl, count])
    count = count + 1

db.truncate('prj_volume.centreline_groups_l2')
db.inserttable('prj_volume.centreline_groups_l2', table)

group_no_merge = [
    x for t in db.query(
        'SELECT DISTINCT group_number FROM prj_volume.centreline_groups LEFT JOIN prj_volume.centreline_groups_l2 ON (group_number=l1_group_number) WHERE l2_group_number IS NULL'
    ).getresult() for x in t
]

for tcl in group_no_merge:
    table.append([tcl, count])
    count = count + 1

db.truncate('prj_volume.centreline_groups_l2')
db.inserttable('prj_volume.centreline_groups_l2', table)
db.close()
Exemplo n.º 5
0
events['group_id'] = group_id
grouped = events.groupby(['group_id'])

event_table = []
for (id), group in grouped:
    row = []
    row.append(id)
    row.append(group['event_name'].iloc[0])
    row.append(group['venue_id'].iloc[0])
    row.append(group['classification'].iloc[0])
    row.append(len(group))
    event_table.append(row)

del events['venue_id']
del events['classification']
del events['event_name']
events = events[[
    'group_id', 'event_id', 'start_date', 'start_time', 'end_date', 'end_time',
    'source'
]]
events['start_date'] = events.apply(caststartdate, axis=1)
events['end_date'] = events.apply(castenddate, axis=1)
events['start_time'] = events.apply(caststarttime, axis=1)
events['end_time'] = events.apply(castendtime, axis=1)
events = events.values.tolist()
db.truncate('city.event_groups')
db.truncate('city.event_details')
db.inserttable('city.event_groups', event_table)
db.inserttable('city.event_details', events)

db.close()