class Conectar: def __init__(self, host, DB, user, password): self.host = host self.db = DB self.user = user self.password = password self.con = None def Conectar(self) -> bool: try: self.con = Postgres( f"postgres://{self.user}:{self.password}@{self.host}:5432/{self.db}" ) except: return False finally: return True def select(self, query): try: self.Conectar() valores = self.con.all(query) return valores except: return False def update(self, query) -> bool: try: self.Conectar() self.con.run(query) except: return False finally: return True def insert(self, query): try: self.Conectar() self.con.run(query) return True except: return False def login_buscar(self, usuario, senha): try: self.Conectar() valores = self.con.all( f'select Case When "Funcionario".cpf = \'{usuario}\' and "Funcionario".senha = \'{senha}\' then \'TRUE\' Else \'False\' End AS COND From "Funcionario";' ) return valores except: return False
class CardOracleTextProcessor: def __init__(self, db_url, batchsize=1000, processor_limit=5): self.db_url = db_url self.db = Postgres(url=db_url) self.batchsize = batchsize self.processor_limit = processor_limit self.process_count = 0 self.processors = [] self.offset = 0 def get_all_cards_from_db(self): return self.db.all( f"select id,oracle_text from cards where exists( select 1 from jsonb_each_text(cards.legalities) j where j.value not like '%not_legal%') and lang='en' limit {self.batchsize}" ) def get_all_cards_from_db_with_offset(self): return self.db.all( f"select id,oracle_text from cards where exists( select 1 from jsonb_each_text(cards.legalities) j where j.value not like '%not_legal%') and lang='en' limit {self.batchsize} offset {self.offset}" ) def limit_processes(self): if len(self.processors) % self.processor_limit == 0: print('waiting for processing jobs to finish') for processor in self.processors: processor.join() print(f'{processor.name} finished.') self.processors = [] def setup_and_start_card_processor_process(self, cards): cards_copy = copy(cards) card_preprocessor = CardProcessorProcess(cards=cards_copy, db_url=self.db_url) card_preprocessor.start() self.processors.append(card_preprocessor) return card_preprocessor def process_all_cards(self): print(f"Pulling first batch of {self.batchsize} cards from database") cards = self.get_all_cards_from_db() while len(cards): self.offset += 1 self.limit_processes() print('setting up card processing thread') self.setup_and_start_card_processor_process(cards) print(f"Pulling next 1000 cards (batch #{self.offset})") cards = self.get_all_cards_from_db_with_offset()
def load_RSS_Feed_DB(): global STRING_DB db = Postgres(STRING_DB) selectList = db.all("SELECT * FROM feed;") allRssFeed = [item[1] for item in selectList] print("def load_RSS_Feed_DB():") print(allRssFeed) print("def load_RSS_Feed_DB():")
class TestCursorFactory(WithData): def setUp(self): # override self.db = Postgres(DATABASE_URL) self.db.run("DROP SCHEMA IF EXISTS public CASCADE") self.db.run("CREATE SCHEMA public") self.db.run("CREATE TABLE foo (bar text, baz int)") self.db.run("INSERT INTO foo VALUES ('buz', 42)") self.db.run("INSERT INTO foo VALUES ('biz', 43)") def test_NamedDictCursor_results_in_namedtuples(self): Record = namedtuple("Record", ["bar", "baz"]) expected = [Record(bar="biz", baz=43), Record(bar="buz", baz=42)] actual = self.db.all("SELECT * FROM foo ORDER BY bar") assert actual == expected def test_namedtuples_can_be_unrolled(self): actual = self.db.all("SELECT baz FROM foo ORDER BY bar") assert actual == [43, 42]
class TestCursorFactory(WithData): def setUp(self): # override self.db = Postgres(DATABASE_URL) self.db.run("DROP SCHEMA IF EXISTS public CASCADE") self.db.run("CREATE SCHEMA public") self.db.run("CREATE TABLE foo (bar text, baz int)") self.db.run("INSERT INTO foo VALUES ('buz', 42)") self.db.run("INSERT INTO foo VALUES ('biz', 43)") def test_NamedDictCursor_results_in_namedtuples(self): Record = namedtuple("Record", ["bar", "baz"]) expected = [Record(bar="biz", baz=43), Record(bar="buz", baz=42)] actual = self.db.all("SELECT * FROM foo ORDER BY bar") assert actual == expected def test_namedtuples_can_be_unrolled(self): actual = self.db.all("SELECT baz FROM foo ORDER BY bar") assert actual == [43, 42]
def sql_listing(param_dict): ''' Run a SQL listing. The sql file is loaded, and then any non-reserved keyword in the parameters is treated as a string to be replaced in the sql string. The SQL is then printed out, before newlines are removed, and then run in one of the allowed modes. The allowed modes are: run : The SQL returns no result one : The SQL should return one row result to be printed top : The SQL returns many results, print the top N (given by global print_num_rows) :param param_dict: dictionary produced by load_and_check_listing_params :return: ''' with open('../../listings/chap%d/%s.sql' % (param_dict['chapter'], param_dict['name']), 'r') as myfile: db = Postgres("postgres://%s:%s@localhost/%s" % (os.environ['CHURN_DB_USER'],os.environ['CHURN_DB_PASS'],os.environ['CHURN_DB'])) # prefix the search path onto the listing, which does not specify the schema sql = "set search_path = '%s'; " % param_dict['schema']; # load the sql file sql = sql + myfile.read() # bind any parameters that match strings in the sql param_keys = [p for p in param_dict.keys() if p not in reserved_param_keywords] for p in param_keys: sql = sql.replace(p, str(param_dict[p])) # Print the sql (then remove the newlines) print('SQL:\n----------\n'+sql+'\n----------\nRESULT:') sql = sql.replace('\n', ' ') # Run in the manner indicated by the mode if param_dict['mode'] == 'run': db.run(sql) elif param_dict['mode'] == 'one': res = db.one(sql) print(res) elif param_dict['mode'] == 'top' or param_dict['mode'] == 'save': res = db.all(sql) df = pd.DataFrame(res) if param_dict['mode'] == 'save': save_path = '../../../fight-churn-output/' + param_dict['schema'] + '/' os.makedirs(save_path,exist_ok=True) csv_path=save_path + param_dict['schema'] + '_' + param_dict['name'].replace(param_dict['prefix'],'') + '.csv' print('Saving: %s' % csv_path) df.to_csv(csv_path, index=False) else: print(df.head(print_num_rows)) else: print('Unknown run mode for SQL example') exit(-4)
def get_important_media_ids(db: postgres.Postgres) -> Set[str]: media_ids = set() for event in db.all("SELECT json FROM event_json"): data = json.loads(event) content = data["content"] if data["type"] == "m.room.member": avatar_url = content.get("avatar_url") if avatar_url: media_ids.add(avatar_url) elif data["type"] == "m.room.avatar": media_ids.add(content["url"]) return set(urllib.parse.urlsplit(url).path[1:] for url in media_ids)
def get_nth_article(): global STRING_DB db = Postgres(STRING_DB) selectList = db.all("SELECT * FROM url;") allUrl = [item[1] for item in selectList] for feed in allRssFeed: print("parsing entries") print(feed) entries = feedparser.parse(feed).entries for i in reversed(range(10)): try: url = entries[i].link except Exception as e: print("excp1 ", e) continue if url not in allUrl: try: db.run( "INSERT INTO url (url) VALUES ('{}') ON CONFLICT (url) DO NOTHING;" .format(url)) except Exception as e: print("excp1", e) article = Article(url) article.download() article.parse() text = article.text articleImage = article.top_image articleTitle = article.title articleUrl = article.url string = text string = re.sub(r"Zoom © .*[\n]*\(Motorsport-Total\.com\)", "", string) # elimina string = re.sub( r"[0-9]+\. [A-Za-z]+ [0-9]+ - [0-9]+:[0-9]+ Uhr", "", string) # elimina data boldArticleContent = "" ###### #MULTITHREADING ###### multithreading = 1 if multithreading: threading.Thread(target=sendTelegraph, args=(articleImage, articleTitle, boldArticleContent, articleUrl, string, feed)).start() else: sendTelegraph(articleImage, articleTitle, boldArticleContent, articleUrl, string, feed)
def get_important_media_ids(db: postgres.Postgres) -> Set[str]: media_ids = set() for event in db.all("SELECT json FROM event_json"): data = json.loads(event) content = data["content"] type_ = data["type"] if type_ == "m.room.member": avatar_url = content.get("avatar_url") if avatar_url: media_ids.add(avatar_url) elif type_ == "m.room.avatar": if "url" not in content: logger.warning("No URL defined for {!r}-event: {!r}", type_, data) continue media_ids.add(content["url"]) return set(urllib.parse.urlsplit(url).path[1:] for url in media_ids)
def get_local_media_record_ids(db: postgres.Postgres, before: datetime) -> List[str]: return db.all( "SELECT media_id FROM local_media_repository WHERE created_ts <= %(before)s", before=int(before.timestamp() * 1000), )
#!/usr/bin/env python3 import uuid import random import datetime from faker import Factory from postgres import Postgres fake = Factory.create() db = Postgres('postgres://luskydive@localhost/luskydive') print('COPY course_spaces (uuid, course_uuid, number, member_uuid) FROM stdin;') courses = db.all('SELECT uuid FROM courses ORDER BY date;') for course in courses: num_spaces = 4 + random.randrange(2, 5, 1) for i in range(0, num_spaces + 1): space_uuid = str(uuid.uuid4()) member_uuid = "\\N" print("%s\t%s\t%d\t%s" % (space_uuid, course, i + 1, member_uuid)) print('\\.')
from sys import argv, stdin, stdout if len(argv) == 1: stdout.write(bbcode_to_markdown(stdin.read().decode('utf-8', 'replace')).encode('utf-8')) else: from postgres import Postgres _, db_uri, db_table, id_column, db_column = argv info = {"table": db_table, "column": db_column, "id": id_column} db = Postgres(db_uri) count = db.one('SELECT COUNT(*) FROM "{table}"'.format(**info)) query = 'UPDATE "{table}" SET "{column}" = %(markdown)s WHERE "{id}" = %(id)s'.format(**info) failures = [] for i, row in enumerate(db.all('SELECT "{id}", "{column}" FROM "{table}"'.format(**info))): print("\x1b[0G{done}/{total}".format(done=i, total=count), end="") stdout.flush() try: db.run(query, {"id": getattr(row, id_column), "markdown": bbcode_to_markdown(getattr(row, db_column))}) except Exception as e: print() print(e) failures.append(getattr(row, id_column)) print() if failures: print("Failed:") print(" ".join(map(str, failures)))
for year in range(academic_year, academic_year + num_years + 1): committee_search_start_date = datetime.datetime(year, 7, 1) committee_search_end_date = datetime.datetime(year + 1, 6, 1) course_start = datetime.datetime(year, 9, 1) course_end = datetime.datetime(year + 1, 7, 1) for i in range(0, num_courses): course_uuid = str(uuid.uuid4()) date = fake.date_time_between_dates(datetime_start=course_start, datetime_end=course_end) potential_organisers = db.all( 'SELECT uuid FROM committee_members WHERE created_at < %(date)s AND created_at < %(end)s', { 'date': date, 'end': committee_search_end_date }) if len(potential_organisers) == 0: raise ValueError('Cannot find any committee members from ' + str(date) + ' to ' + str(committee_search_end_date)) else: organiser_uuid = random.sample(potential_organisers, 1)[0] potential_organisers.remove(organiser_uuid) secondary_organiser_uuid = "\\N" if random.random() > 0.4: secondary_organiser_uuid = random.sample(
#!/usr/bin/env python3 import uuid import random import datetime from hashlib import sha256 from faker import Factory from postgres import Postgres fake = Factory.create() db = Postgres('postgres://luskydive@localhost/luskydive') print( 'COPY text_messages (uuid, member_uuid, mass_text_uuid, status, to_number, from_number, message, external_id, created_at, updated_at) FROM stdin;' ) mass_texts = db.all('SELECT uuid, template, created_at FROM mass_texts;') for mass_text in mass_texts: start = datetime.datetime(mass_text.created_at.year, 8, 1) end = datetime.datetime(mass_text.created_at.year + 1, 7, 28) phone_number = '+447' + str(random.randrange(100000000, 999999999, 1)) members = db.all( 'SELECT uuid, phone_number, name FROM members WHERE phone_number IS NOT NULL AND created_at > %(start)s AND created_at < %(end)s', { 'start': start, 'end': end }) for member in members: created_at = mass_text.created_at
// {0}.get{1}()""".format(table.java_name_lc, column.java_name) output += """\n//); } }""" return output # The contents of db_string.txt should be: postgres://curtin_lims_user:password@localhost/curtin_lims db_string = '' with open(script_path + '/db_string.txt') as f: db_string = f.readlines()[0] db = Postgres(db_string) tables = [] table_names = db.all("SELECT table_name FROM INFORMATION_SCHEMA.Tables WHERE table_schema = 'public' AND table_type = 'BASE TABLE' AND table_name NOT IN ('spatial_ref_sys', 'gt_pk_metadata_table')") reference_descriptions = db.all(""" SELECT kcu.table_name as referencing_table, ccu.table_name as referenced_table, kcu.column_name FROM information_schema.table_constraints AS tc JOIN information_schema.key_column_usage AS kcu ON tc.constraint_name = kcu.constraint_name JOIN information_schema.constraint_column_usage AS ccu ON ccu.constraint_name = tc.constraint_name WHERE constraint_type = 'FOREIGN KEY' AND tc.table_catalog = 'curtin_lims'""") for table_name in table_names:
path = '../sql/export_dataset_current.sql' with open(path , 'r') as myfile: sql += myfile.read() # Fill in the standard bind parameters with the arguments sql = sql.replace(FRBIND,args.frdt) sql = sql.replace(TOBIND,args.todt) sql = sql.replace(INTBIND,args.interval) # Generate the SQL that flattens the metrics (KEY STEP) con_string = f"postgresql://localhost/{os.environ['CHURN_DB']}?user={os.environ['CHURN_DB_USER']}&password={os.environ['CHURN_DB_PASS']}" db = Postgres(con_string) sql = sql.replace(METRIC_BIND, generate_flat_metric_sql(db, args.schema)) print('EXPORT SQL:\n----------\n' + sql + '\n----------\n') sql = sql.replace('\n', ' ') # Execute the query and get the result into a data frame res = db.all(sql) df = pd.DataFrame(res) # Save to a csv save_path = '../../../fight-churn-output/' + args.schema + '/' os.makedirs(save_path,exist_ok=True) if not args.current: csv_path=save_path + args.schema + '_dataset.csv' else: csv_path=save_path + args.schema + '_dataset_current.csv' print('Saving: %s' % csv_path) df.to_csv(csv_path, index=False)
def get_room_record_ids(db: postgres.Postgres) -> List[str]: return db.all("SELECT room_id FROM rooms")
class TestCache(TestCase): def setUp(self): self.db = Postgres(cache=Cache(max_size=1), cursor_factory=SimpleTupleCursor) self.db.run("DROP SCHEMA IF EXISTS public CASCADE") self.db.run("CREATE SCHEMA public") self.db.run("CREATE TABLE foo (key text, value int)") self.db.run("INSERT INTO foo VALUES ('a', 1)") self.db.run("INSERT INTO foo VALUES ('b', 2)") def test_one_returns_cached_row(self): query = "SELECT * FROM foo WHERE key = 'a'" r1 = self.db.one(query, max_age=10) r2 = self.db.one(query, max_age=10) assert r2 is r1 def test_all_returns_cached_rows(self): query = "SELECT * FROM foo ORDER BY key" r1 = self.db.all(query, max_age=10) r2 = self.db.all(query, max_age=10) assert r2 == r1 assert r2 is not r1 assert r2[0] is r1[0] def test_back_as_is_compatible_with_caching(self): query = "SELECT * FROM foo WHERE key = 'a'" r1 = self.db.one(query, back_as=dict, max_age=10) r2 = self.db.one(query, back_as=namedtuple, max_age=10) assert r1 == r2._asdict() rows = self.db.all(query, back_as='Row', max_age=10) assert rows == [r1] def test_all_returns_row_cached_by_one(self): query = "SELECT * FROM foo WHERE key = 'a'" row = self.db.one(query, max_age=10) rows = self.db.all(query, max_age=10) assert rows == [row] assert rows[0] is row def test_one_raises_TooMany_when_the_cache_contains_multiple_rows(self): query = "SELECT * FROM foo" rows = self.db.all(query, max_age=10) assert len(rows) == 2 with self.assertRaises(TooMany): self.db.one(query, max_age=10) def test_cache_max_size(self): query1 = b"SELECT * FROM foo WHERE key = 'a'" query2 = b"SELECT * FROM foo WHERE key = 'b'" self.db.all(query1, max_age=10) assert set(self.db.cache.entries.keys()) == {query1} self.db.all(query2, max_age=10) assert set(self.db.cache.entries.keys()) == {query2} def test_cache_max_age(self): query = b"SELECT * FROM foo WHERE key = 'a'" r1 = self.db.one(query, max_age=0) r2 = self.db.one(query, max_age=10) assert r2 is not r1 def test_cache_prune(self): self.db.cache.max_size = 2 query1 = b"SELECT * FROM foo WHERE key = 'a'" query2 = b"SELECT * FROM foo WHERE key = 'b'" self.db.one(query1, max_age=-1) self.db.one(query2, max_age=10) assert set(self.db.cache.entries.keys()) == {query1, query2} self.db.cache.prune() assert set(self.db.cache.entries.keys()) == {query2} def test_cache_prevents_concurrent_queries(self): with self.db.get_cursor() as cursor: cursor.run("LOCK TABLE foo IN EXCLUSIVE MODE") def insert(): self.db.one("INSERT INTO foo VALUES ('c', 3) RETURNING *", max_age=1) t1 = Thread(target=insert) t2 = Thread(target=insert) t1.start() t2.start() cursor.run("COMMIT") # this releases the table lock t1.join() t2.join() n = self.db.one("SELECT count(*) FROM foo WHERE key = 'c'") assert n == 1
def load_chat_id(): global chat_id_List global STRING_DB db = Postgres(STRING_DB) selectList = db.all("SELECT chat_id FROM users;") chat_id_List = selectList
class PostgreSQLDB(object): FIELD_SONG_ID = 'song_id' FIELD_SONGNAME = 'song_name' FIELD_OFFSET = 'time' FIELD_HASH = 'hash' SONG_ID = 'song_id' SONG_NAME = 'song_name' CONFIDENCE = 'confidence' MATCH_TIME = 'match_time' OFFSET = 'time' OFFSET_SECS = 'offset_seconds' db = None # tables FINGERPRINTS_TABLENAME = 'fingerprints' SONGS_TABLENAME = 'songs' # creates CREATE_FINGERPRINTS_TABLE = """ CREATE TABLE IF NOT EXISTS "%s"( "%s" INT PRIMARY KEY NOT NULL, "%s" INT NOT NULL, "%s" INT NOT NULL);""" % (FINGERPRINTS_TABLENAME, FIELD_HASH, FIELD_SONG_ID, FIELD_OFFSET) CREATE_SONGS_TABLE = \ """CREATE TABLE IF NOT EXISTS "%s"( "%s" SERIAL PRIMARY KEY , "%s" varchar(250) NOT NULL);""" % \ (SONGS_TABLENAME, FIELD_SONG_ID, FIELD_SONGNAME) SELECT_SONG = """SELECT %s FROM %s WHERE %s = %%s;""" \ % (FIELD_SONGNAME, SONGS_TABLENAME, FIELD_SONG_ID) # inserts fingerprint. Update if existing INSERT_FINGERPRINT = \ """INSERT INTO %s VALUES (%%s, %%s, %%s) ON CONFLICT (%s) DO UPDATE SET %s = EXCLUDED.%s, %s = EXCLUDED.%s;""" \ % (FINGERPRINTS_TABLENAME, FIELD_HASH, FIELD_SONG_ID, FIELD_SONG_ID, FIELD_OFFSET, FIELD_OFFSET) INSERT_SONG = "INSERT INTO %s (%s) VALUES (%%s);" % (SONGS_TABLENAME, FIELD_SONGNAME) SELECT_MULTIPLE = """SELECT %s, %s, %s FROM %s WHERE %s IN (%%s);""" \ % (FIELD_HASH, FIELD_SONG_ID, FIELD_OFFSET, FINGERPRINTS_TABLENAME, FIELD_HASH) def __init__(self, drop_tables=False): super(PostgreSQLDB, self).__init__() if os.environ.get('DOCKERCLOUD_SERVICE_HOSTNAME', None) is not None: self.db = Postgres( u"postgres://*****:*****@postgres/hashes") else: # self.db = Postgres(u"postgres://*****:*****@localhost/postgres") self.db = Postgres( u"postgres://*****:*****@pervasivesounds.com/hashes" ) if drop_tables: self.db.run("DROP TABLE IF EXISTS %s CASCADE" % self.SONGS_TABLENAME) self.db.run("DROP TABLE IF EXISTS %s CASCADE" % self.FINGERPRINTS_TABLENAME) self.db.run(self.CREATE_SONGS_TABLE) self.db.run(self.CREATE_FINGERPRINTS_TABLE) def store(self, name, hashes): sid = self.insert_song(name) self.insert_hashes(sid, hashes) def insert_hash(self, song_id, hash, offset): self.db.run(self.INSERT_FINGERPRINT, (hash, song_id, offset)) def insert_hashes(self, sid, hashes): values = [] for time_, hash_ in hashes: values.append((int(hash_), sid, time_)) with self.db.get_cursor() as cur: for split_values in batch(values, 1000): cur.executemany(self.INSERT_FINGERPRINT, split_values) def insert_song(self, songname): """ Inserts song in the database and returns the ID of the inserted record. """ self.db.run(self.INSERT_SONG, (songname, )) return self.db.one( "SELECT %s FROM %s ORDER BY %s DESC LIMIT 1" % (self.FIELD_SONG_ID, self.SONGS_TABLENAME, self.FIELD_SONG_ID)) def get_song_by_id(self, sid): """ Returns song by its ID. """ return self.db.one(self.SELECT_SONG, (int(sid), )) def return_matches(self, hashes): mapper = {} for offset, hash in hashes: mapper[int(hash)] = offset # Get an iteratable of all the hashes we need values = list(mapper.keys()) res = [] if hashes is not None: for split_values in batch(values, 100): query = self.SELECT_MULTIPLE query %= ', '.join(["%s"] * len(split_values)) [ res.append(r) for r in self.db.all(query, split_values, back_as=tuple) ] return np.asarray([(sid, offset - mapper[hash]) for (hash, sid, offset) in res]) def get_best_sids(self, matches): unique, counts = np.unique(matches[:, 0], return_counts=True) return unique[np.argsort(counts)[::-1][:np.minimum(len(counts), 20)]] def align_matches(self, matches, sids): """ Finds hash matches that align in time with other matches and finds consensus about which hashes are "true" signal from the audio. Returns a dictionary with match information. """ # align by diffs diff_counter = {} largest = 0 largest_count = 0 song_id = -1 for sid in sids: for sid, diff in matches[matches[:, 0] == sid]: if sid not in diff_counter: diff_counter[sid] = {} if diff not in diff_counter[sid]: diff_counter[sid][diff] = 0 diff_counter[sid][diff] += 1 if diff_counter[sid][diff] > largest_count: largest = diff largest_count = diff_counter[sid][diff] song_id = sid # total_count = {} # for sid in diff_counter.keys(): # total_count[sid] = np.sum(diff_counter[sid].values) songs = [] for sid in diff_counter.keys(): song_name = self.get_song_by_id(sid) for diff in diff_counter[sid].keys(): confidence = diff_counter[sid][diff] if confidence > 4: songs.append({ 'song_id': song_id, 'song_name': song_name, 'confidence': confidence, 'offset': diff }) return songs
fake = Factory.create() db = Postgres('postgres://luskydive@localhost/luskydive') num_api_keys = 100 created_start = datetime.datetime(2008, 9, 1) created_end = datetime.datetime(2017, 9, 1) for i in range(0, num_api_keys): api_key_uuid = str(uuid.uuid4()) delta = datetime.timedelta(hours=random.random() * 24 + 18) created_at = fake.date_time_between_dates(datetime_start=created_start, datetime_end=created_end) expires_at = fake.date_time_between_dates(datetime_start=created_at, datetime_end=created_at + delta) potential_committee_members = db.all('SELECT uuid FROM committee_members;') if len(potential_committee_members) == 0: raise ValueError('Cannot find any committee members.') else: committee_member_uuid = random.sample(potential_committee_members, 1)[0] print( 'INSERT INTO api_keys (uuid, committee_member_uuid, created_at, expires_at) VALUES(\'%s\', \'%s\', \'%s\', \'%s\');' % (api_key_uuid, committee_member_uuid, created_at.strftime('%Y-%m-%d %H:%M:%S'), expires_at.strftime('%Y-%m-%d %H:%M:%S')))
#!/usr/bin/env python3 import uuid import random import datetime from faker import Factory from postgres import Postgres fake = Factory.create() db = Postgres('postgres://luskydive@localhost/luskydive') update_query = 'UPDATE course_spaces SET member_uuid = \'%s\' WHERE uuid = \'%s\';' spaces = db.all('SELECT course_spaces.uuid, courses.date FROM courses INNER JOIN course_spaces ON courses.uuid = course_spaces.course_uuid ORDER BY date;') for space in spaces: if (random.random() > 0.4): potential_members = db.all('SELECT uuid FROM members WHERE created_at < %(date)s AND date_part(\'month\', age(%(date)s, created_at)) <= 9', {'date': space.date}) member_uuid = random.sample(potential_members, 1)[0] print(update_query % (member_uuid, str(space.uuid)))
def sql_listing(chapter, listing, name, schema, mode, param_dict, insert=False, save_ext=None): """ Run a SQL listing. The sql file is loaded, and then any non-reserved keyword in the parameters is treated as a string to be replaced in the sql string. The SQL is then printed out. :param chapter: :param listing: :param name: :param schema: :param mode:The allowed modes are: run : The SQL returns no result one : The SQL should return one row result to be printed top : The SQL returns many results, print the top N (given by global print_num_rows) :param param_dict: the bind variables as a dictionary :param insert: flag to use the insert form of a query; see chapter 7 :param save_ext: :return: """ with open( '../../listings/chap%d/%s.sql' % (chapter, _full_listing_name(chapter, listing, name, insert)), 'r') as myfile: db = Postgres("postgres://%s:%s@localhost/%s" % (os.environ['CHURN_DB_USER'], os.environ['CHURN_DB_PASS'], os.environ['CHURN_DB'])) # prefix the search path onto the listing, which does not specify the schema sql = "set search_path = '%s'; " % schema # load the sql file sql = sql + myfile.read() # bind any parameters that match strings in the sql param_keys = [ p for p in param_dict.keys() if p not in reserved_param_keywords ] for p in param_keys: sql = sql.replace(p, str(param_dict[p])) # Print the sql (then remove the newlines) print('SQL:\n----------\n' + sql + '\n----------\nRESULT:') sql = sql.replace('\n', ' ') # Run in the manner indicated by the mode if mode == 'run': db.run(sql) elif mode == 'one': res = db.one(sql) print(res) elif mode == 'top' or mode == 'save': res = db.all(sql) df = pd.DataFrame(res) if mode == 'save': save_path = '../../../fight-churn-output/' + schema + '/' os.makedirs(save_path, exist_ok=True) csv_path = save_path + schema + '_' + name.replace( 'listing_{}_{}_'.format(chapter, listing), '') if save_ext: csv_path = csv_path + '_' + save_ext csv_path = csv_path + '.csv' print('Saving: %s' % csv_path) df.to_csv(csv_path, index=False) else: print(df.head(print_num_rows)) else: print('Unknown run mode for SQL example') exit(-4)
output += """\n//); } }""" return output # The contents of db_string.txt should be: postgres://curtin_lims_user:password@localhost/curtin_lims db_string = '' with open(script_path + '/db_string.txt') as f: db_string = f.readlines()[0] db = Postgres(db_string) tables = [] table_names = db.all( "SELECT table_name FROM INFORMATION_SCHEMA.Tables WHERE table_schema = 'public' AND table_type = 'BASE TABLE' AND table_name NOT IN ('spatial_ref_sys', 'gt_pk_metadata_table')" ) reference_descriptions = db.all(""" SELECT kcu.table_name as referencing_table, ccu.table_name as referenced_table, kcu.column_name FROM information_schema.table_constraints AS tc JOIN information_schema.key_column_usage AS kcu ON tc.constraint_name = kcu.constraint_name JOIN information_schema.constraint_column_usage AS ccu ON ccu.constraint_name = tc.constraint_name WHERE constraint_type = 'FOREIGN KEY' AND tc.table_catalog = 'curtin_lims'""")