class vol_utils(object): def __init__(self): self.logger = logging.getLogger('volume_project.sql_utilities') self.db_connect() def db_connect(self): CONFIG = configparser.ConfigParser() CONFIG.read('db.cfg') dbset = CONFIG['DBSETTINGS'] self.db = DB(dbname=dbset['database'], host=dbset['host'], user=dbset['user'], passwd=dbset['password']) self.logger.info('Database connected.') def exec_file(self, filename): try: f = open(filename) exec(filename) except: for root_f, folders, files in os.walk('.'): if filename in files: f = root_f + '/' + filename break self.logger.info('Running ', f) exec(f) if f is None: self.logger.error('File %s not found!', filename) raise Exception('File %s not found!', filename) def execute_sql(self, filename): f = None try: f = open(filename) except: for root_f, folders, files in os.walk('.'): if filename in files: f = open(root_f + '/' + filename) if f is None: self.logger.error('File %s not found!', filename) raise Exception('File not found!') sql = f.read() reconnect = 0 while True: try: self.db.query(sql) self.db.commit() return except ProgrammingError as pe: self.logger.error('Error in SQL', exc_info=True) self.db_connect() reconnect += 1 if reconnect > 5: raise Exception('Check DB connection. Cannot connect') def get_sql_results(self, filename, columns, replace_columns=None, parameters=None): ''' Input: filename columns: a list of column names replace_columns: a dictionary of {placeholders:real strings} parameters: list of parameter values Output: dataframe of results ''' f = None try: f = open(filename) except: for root_f, folders, files in os.walk('.'): if filename in files: f = open(root_f + '/' + filename) if f is None: if filename[: 6] == 'SELECT': # Also accepts sql queries directly in string form sql = filename else: self.logger.error('File %s not found!', filename) raise Exception('File not found!') else: sql = f.read() if replace_columns is not None: for key, value in replace_columns.items(): sql = sql.replace(key, str(value)) reconnect = 0 while True: try: if parameters is not None: return pd.DataFrame(self.db.query(sql, parameters).getresult(), columns=columns) else: return pd.DataFrame(self.db.query(sql).getresult(), columns=columns) except ProgrammingError as pe: self.logger.error('Error in SQL', exc_info=True) self.db_connect() reconnect += 1 if reconnect > 5: raise Exception('Check Error Message') def load_pkl(self, filename): f = None try: f = open(filename, "rb") except: for root_f, folders, files in os.walk('.'): if filename in files: f = open(root_f + '/' + filename) if f is None: self.logger.error('File %s not found!', filename) raise Exception('File not found!') return pickle.load(f) def truncatetable(self, tablename): reconnect = 0 while True: try: self.db.truncate(tablename) self.db.commit() self.logger.info('%s truncated', tablename) return except ProgrammingError as pe: print(pe) self.db_connect() reconnect += 1 if reconnect > 5: self.logger.error('Error in SQL', exc_info=True) raise Exception('Check Error Message') def inserttable(self, tablename, content): reconnect = 0 while True: try: self.db.inserttable(tablename, content) self.db.commit() self.logger.info('Inserted table: %s', tablename) break except ProgrammingError: self.db_connect() reconnect += 1 if reconnect > 5: self.logger.error('Error in SQL', exc_info=True) raise Exception('Check Error Message') def __exit__(self): self.db.close()
class PostGreDBConnector: """PostGreDBConnector opens a PostGre DB connection. Different functions allow you to add, delete or update documents in PostGre DB.""" def __init__(self): """Connecting to localhost (default host and port [localhost, 4532]) PostGre DB and initializing needed data base and tables.""" try: print("Connecting to PostGre DB...") self.__db = DB(dbname='testdb', host='localhost', port=5432, user='******', passwd='superuser') print("PostGre DB connection successfully built.") except ConnectionError: print("PostGre DB connection could not be built.") self.delete_all_data() self.drop_all_tables() def close_connection(self): self.__db.close() def create_schema(self, schema_name): self.__db.query("CREATE SCHEMA " + schema_name) self.__create_tables(schema_name) self.__create_functions(schema_name) def __create_tables(self, schema): """Create needed tables for RDF parsing.""" schema += "." self._add_table("CREATE TABLE " + schema + "texts (id serial primary key, title text)") self._add_table( "CREATE TABLE " + schema + "bscale (id serial primary key, bscale text, nominal bool, ordinal bool, interval bool)") self._add_table("CREATE TABLE " + schema + "bsort (id serial primary key, bsort text)") self._add_table("CREATE TABLE " + schema + "pattern (id serial primary key, pattern text)") self._add_table("CREATE TABLE " + schema + "single_pattern (id serial primary key, single_pattern text)") self._add_table("CREATE TABLE " + schema + "snippets (id serial primary key, snippet text)") # relations self._add_table("CREATE TABLE " + schema + "has_attribute (bsort_id int, bscale_id integer[], aggregation int)") self._add_table("CREATE TABLE " + schema + "has_object (bscale_id int, pattern_id integer[], aggregation int)") self._add_table( "CREATE TABLE " + schema + "pattern_single_pattern (pattern_id int, single_pattern_id integer[], aggregation int)") self._add_table("CREATE TABLE " + schema + "texts_snippets (text_id int primary key, snippet_id integer[], aggregation int)") self._add_table( "CREATE TABLE " + schema + "snippet_offsets (id serial primary key," " single_pattern_id int, snippet_id int, offsets integer[][], aggregation int)") # adjective and verb extractions self._add_table("CREATE TABLE " + schema + "subject_occ (id serial primary key, subject text, count int)") self._add_table("CREATE TABLE " + schema + "adjective_occ (id serial primary key, adjective text, count int)") self._add_table("CREATE TABLE " + schema + "verb_occ (id serial primary key, verb text, count int)") self._add_table("CREATE TABLE " + schema + "object_occ (id serial primary key, object text, count int)") self._add_table("CREATE TABLE " + schema + "subject_adjective_occ (id serial primary key, subject int, adjective int, count int, pmi float)") self._add_table("CREATE TABLE " + schema + "subject_object_occ (id serial primary key, subject int, object int, count int, pmi float)") self._add_table("CREATE TABLE " + schema + "object_verb_occ (id serial primary key, object int, verb int, count int, pmi float)") self._add_table("CREATE TABLE " + schema + "subject_verb_occ (id serial primary key, subject int, verb int, count int, pmi float)") # correlating pattern self._add_table("CREATE TABLE " + schema + "bscale_single_pattern (id serial primary key, bscale_id int, single_pattern_id int, single_pattern text, count int)") self._add_table( "CREATE TABLE " + schema + "correlating_pattern (id serial primary key, pattern_a int, pattern_b int, count int, pmi float)") def __create_functions(self, schema): """Create all necessary functions to aggregate the results saved in the database.""" schema += "." self.add_function(schema + "aggregate_texts_snippets", "SELECT text_id, array_length(snippet_id, 1) FROM " + schema + "texts_snippets") self.add_function(schema + "aggregate_snippet_offsets", "SELECT id, array_length(offsets, 1) FROM " + schema + "snippet_offsets") def add_function(self, name, function): """Create a new function in the db.""" create_function = "CREATE FUNCTION " returns = "() RETURNS SETOF RECORD AS " lang = " LANGUAGE SQL" query = create_function + name + returns + add_quotes(function) + lang self.__db.query(query) def _add_table(self, query): """Create a new table with a query.""" self.__db.query(query) def add_table(self, schema, name, rows): """Create a new table with a name and rows given in query form.""" create_table = "CREATE TABLE " query = create_table + schema + "." + name + rows self.__db.query(query) def insert(self, schema, table, row): """Insert a new row element into a specified table.""" return self.__db.insert(schema + "." + table, row) def is_in_table(self, schema, table, where_clause): """Returns whether a row already exists in a table or not.""" select = "SELECT * FROM " where = " WHERE " q = select + schema + "." + table + where + where_clause result = self.__db.query(q).dictresult() if len(result) > 0: return True else: return False def update(self, schema, table, values, where_clause): """Update an entry in a specified table.""" UPDATE = "UPDATE " SET = " SET " WHERE = " WHERE " query = UPDATE + schema + "." + table + SET + values + WHERE + where_clause self.query(query) def get(self, schema, table, where_clause, key): """Return the key of a specific item in a table.""" select = "SELECT " _from = " FROM " where = " WHERE " q = select + key + _from + schema + "." + table + where + where_clause result = self.__db.query(q).dictresult() if len(result) > 0: return result[0][key] else: return None def get_data_from_table(self, schema, table): """Gets all data available in a specific table.""" return self.__db.query("SELECT * FROM " + schema + "." + table).dictresult() def get_id(self, schema, table, where_clause): """Return the id of an item in a table. If found return id number of found item, else None.""" select = "SELECT id FROM " where = " WHERE " q = select + schema + "." + table + where + where_clause result = self.__db.query(q).dictresult() if len(result) > 0: return result[0]['id'] else: return None def delete_from_table(self, schema, table, row): """Delete a row element form a specific table.""" return self.__db.delete(schema + "." + table, row) def delete_data_in_table(self, schema, table): """Delete all data in a specific table.""" self.__db.truncate(schema + "." + table, restart=True, cascade=True, only=False) def delete_all_data(self): """Deletes all data from all existing tables.""" tables = self.get_tables() for table in tables: table_name = str(table) self.__db.truncate(table_name, restart=True, cascade=True, only=False) def get_tables(self): """Get all available tables in the database.""" return self.__db.get_tables() def get_attributes(self, schema, table): """Get all attributes of a specified table.""" return self.__db.get_attnames(schema + "." + table) def drop_table(self, schema, table): """Drops a specified table.""" query = "DROP TABLE " self.__db.query(query + schema + "." + table) def drop_all_tables(self): """Drops all existing tables.""" tables = self.get_tables() table_names = "" if len(tables) > 0 : for ind, table in enumerate(tables): if ind == 0: table_names = str(table) else: table_names = table_names + ", " + str(table) self.__db.query("DROP TABLE " + table_names) else: print("Nothing to delete.") def get_all(self, schema, table, attribute): """Gets one or more attributes of all entries from a specified table.""" select = "SELECT " _from = " FROM " query = select + attribute + _from + schema + "." + table return self.__db.query(query).dictresult() def query(self, query): """Sends a query to the database.""" result = self.__db.query(query) if result is not None: if not isinstance(result, str): return result.dictresult() else: return result
tree.findall(".//EndTime"), tree.findall(".//WorkPeriod"), tree.findall(".//Expired"), tree.findall(".//Signing"), tree.findall(".//Notification"), tree.findall(".//WorkEventType"), tree.findall(".//Contractor"), tree.findall(".//PermitType"), tree.findall(".//Description")): rowsql = [] for x in a: if x.tag in ('LastUpdated', 'StartTime', 'EndTime') and x.text is not None: y = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(float(x.text) / 1000)) else: y = x.text if x.tag == 'Name': rowsql.append(None) rowsql.append(None) rowsql.append(None) rowsql.append(y) rowsql.append(None) data.append(rowsql) db.truncate('city.restrictions_import') db.inserttable('city.restrictions_import', data) sql = db.query( "DELETE FROM city.restrictions USING city.restrictions_import WHERE city.restrictions.id = city.restrictions_import.id" ) sql = db.query( "INSERT INTO city.restrictions SELECT * FROM city.restrictions_import") db.close()
chain.append(current) root.remove(current) to_visit.extend(list(pairs.groupby('c1').get_group(current)['c2'])) visited.append(current) chains.append(chain) groups = {} count = 1 table = [] for group in chains: for tcl in group: table.append([tcl, count]) count = count + 1 db.truncate('prj_volume.centreline_groups_l2') db.inserttable('prj_volume.centreline_groups_l2', table) group_no_merge = [ x for t in db.query( 'SELECT DISTINCT group_number FROM prj_volume.centreline_groups LEFT JOIN prj_volume.centreline_groups_l2 ON (group_number=l1_group_number) WHERE l2_group_number IS NULL' ).getresult() for x in t ] for tcl in group_no_merge: table.append([tcl, count]) count = count + 1 db.truncate('prj_volume.centreline_groups_l2') db.inserttable('prj_volume.centreline_groups_l2', table) db.close()
events['group_id'] = group_id grouped = events.groupby(['group_id']) event_table = [] for (id), group in grouped: row = [] row.append(id) row.append(group['event_name'].iloc[0]) row.append(group['venue_id'].iloc[0]) row.append(group['classification'].iloc[0]) row.append(len(group)) event_table.append(row) del events['venue_id'] del events['classification'] del events['event_name'] events = events[[ 'group_id', 'event_id', 'start_date', 'start_time', 'end_date', 'end_time', 'source' ]] events['start_date'] = events.apply(caststartdate, axis=1) events['end_date'] = events.apply(castenddate, axis=1) events['start_time'] = events.apply(caststarttime, axis=1) events['end_time'] = events.apply(castendtime, axis=1) events = events.values.tolist() db.truncate('city.event_groups') db.truncate('city.event_details') db.inserttable('city.event_groups', event_table) db.inserttable('city.event_details', events) db.close()