def load_User_Me(): global STRING_DB global MY_CHAT_ID_TELEGRAM db = Postgres(STRING_DB) db.run( "INSERT INTO users (chat_id,name,time_added) VALUES ('{}','{}','{}') ON CONFLICT (chat_id) DO NOTHING ;" .format(MY_CHAT_ID_TELEGRAM, "@f126ck", "1503407762"))
class PSQL(object): def __init__(self, username, password, hostname, hostport, database): self.db = Postgres("postgres://{}:{}@{}:{}/{}".format( username, password, hostname, hostport, database)) def cursor(self, sql): self.db.run(sql)
def dump_db(): """ Def used to save the data on Postgres DB/Table Note, here you can see the info as below "postgresql://*****:*****@server_running_postgres/bz" where --- Username ........: bz_user Password ........: bz_user_00 Server FQDN .....: server_running_postgres Database ........: bz --- """ print("Updating DB") db = Postgres( url='postgresql://*****:*****@server_running_postgres/bz') db.run("delete from bz_info") for item in bz_list: print("ID here: {}".format(item[1])) db.run("insert into bz_info values('" + TODAY_STR + "','" + str(item[1]) + "','" + str(item[2]) + "','" + str(item[3]) + "','" + str(item[4]) + "','" + str(item[5]) + "','" + str(item[6]) + "','" + str(item[7]) + "','" + str(item[8]) + "','" + str(item[9]) + "','" + str(item[10]) + "','" + str(item[11]) + "','" + str(item[12]) + "')")
class db(): "Wrapper for postgres https://postgres-py.readthedocs.org/en/2.1.2/index.html" def __init__(self, user=user, schema=schema): self.schema = schema self.host = 'postgres://%[email protected]/%s' % (user, schema) self.path = os.path.abspath( os.path.join(os.path.dirname(__file__), "..")) def connect(self): print 'Connecting to %s ...' % self.host self.db_conn = Postgres(self.host) def init(self): print self.schema try: # Enable PostGIS (includes raster) self.db_conn.run("CREATE EXTENSION postgis") # Enable Topology self.db_conn.run("CREATE EXTENSION postgis_topology") # fuzzy matching needed for Tiger self.db_conn.run("CREATE EXTENSION fuzzystrmatch") # Enable US Tiger Geocoder self.db_conn.run("CREATE EXTENSION postgis_tiger_geocoder") # catch all exception except: print 'PostGis is already installed' query_hotspot = "CREATE TABLE IF NOT EXISTS %s.hotspot (gid serial, latitude numeric, longitude numeric, brightness numeric, scan numeric, track numeric, acq_date date, acq_time varchar(5), satellite varchar(1), confidence int4, version varchar(3), bright_t31 numeric, frp numeric, geom GEOGRAPHY(POINT,4326, 2), PRIMARY KEY(gid))" % self.schema query_country = "CREATE TABLE IF NOT EXISTS %s.country (gid serial, fips varchar(2), iso2 varchar(2), iso3 varchar(3), un int4, name varchar(50), area int4, pop2005 int4, region int4, subregion int4, lon float8, lat float8, geom GEOGRAPHY(MULTIPOLYGON, 4326, 2), PRIMARY KEY(gid))" % self.schema # create default tables self.db_conn.run(query_hotspot) self.db_conn.run(query_country) def register(self): '' #self.db_conn.register_model(Hotspot) def parseShape(self): path = "%s/global" % self.path ts = int(time.time()) # here we run two transaction from the parsed files for cmd in [{ 'db': 'hotspot', 'parse': 'Global_24h.shp', }, { 'db': 'country', 'parse': 'TM_WORLD_BORDERS-0.3.shp' }]: run = '/usr/local/bin/shp2pgsql -a -D -s 4326 -W latin1 -i %s/%s %s.%s | /usr/local/bin/psql -d %s' % ( path, cmd['parse'], self.schema, cmd['db'], self.schema) p = subprocess.Popen(run, stdout=subprocess.PIPE, shell=True) output, err = p.communicate() print output
def insert_RSS_Feed_DB(): global STRING_DB db = Postgres(STRING_DB) url = 'http://www.motorsport-total.com/rss_f1.xml' db.run( "INSERT INTO feed (url) VALUES ('{}') ON CONFLICT (url) DO NOTHING;". format(url)) url = 'http://www.motorsport-total.com/rss_motorrad_MGP.xml' db.run( "INSERT INTO feed (url) VALUES ('{}') ON CONFLICT (url) DO NOTHING;". format(url))
class Database: def __init__(self): _user = getenv("DATABASE.USER") _secret = getenv("DATABASE.PASS") _host = getenv("DATABASE.HOST") _port = getenv("DATABASE.PORT") _dbname = getenv("DATABASE.DB") self.db = Postgres(url=f"postgresql://{_user}:{_secret}@{_host}:{_port}/{_dbname}") def create_table(self, table: TableBase): self.db.run(table.create_sql(drop=True))
def remove_obsevations(schema): ''' Truncate the active period and observtions tables to prepare for a new dataset generation :param schema: :return: ''' print('Removing old active_period and observation entries...') con_string = f"postgresql://localhost/{os.environ['CHURN_DB']}?user={os.environ['CHURN_DB_USER']}&password={os.environ['CHURN_DB_PASS']}" db = Postgres(con_string) db.run('truncate table %s.active_period' % schema) db.run('truncate table %s.observation' % schema)
def remove_obsevations(schema): ''' Truncate the active period and observtions tables to prepare for a new dataset generation :param schema: :return: ''' print('Removing old active_period and observation entries...') db = Postgres("postgres://%s:%s@localhost/%s" % (os.environ['CHURN_DB_USER'], os.environ['CHURN_DB_PASS'], os.environ['CHURN_DB'])) db.run('truncate table %s.active_period' % schema) db.run('truncate table %s.observation' % schema)
class Conectar: def __init__(self, host, DB, user, password): self.host = host self.db = DB self.user = user self.password = password self.con = None def Conectar(self) -> bool: try: self.con = Postgres( f"postgres://{self.user}:{self.password}@{self.host}:5432/{self.db}" ) except: return False finally: return True def select(self, query): try: self.Conectar() valores = self.con.all(query) return valores except: return False def update(self, query) -> bool: try: self.Conectar() self.con.run(query) except: return False finally: return True def insert(self, query): try: self.Conectar() self.con.run(query) return True except: return False def login_buscar(self, usuario, senha): try: self.Conectar() valores = self.con.all( f'select Case When "Funcionario".cpf = \'{usuario}\' and "Funcionario".senha = \'{senha}\' then \'TRUE\' Else \'False\' End AS COND From "Funcionario";' ) return valores except: return False
def sql_listing(param_dict): ''' Run a SQL listing. The sql file is loaded, and then any non-reserved keyword in the parameters is treated as a string to be replaced in the sql string. The SQL is then printed out, before newlines are removed, and then run in one of the allowed modes. The allowed modes are: run : The SQL returns no result one : The SQL should return one row result to be printed top : The SQL returns many results, print the top N (given by global print_num_rows) :param param_dict: dictionary produced by load_and_check_listing_params :return: ''' with open('../../listings/chap%d/%s.sql' % (param_dict['chapter'], param_dict['name']), 'r') as myfile: db = Postgres("postgres://%s:%s@localhost/%s" % (os.environ['CHURN_DB_USER'],os.environ['CHURN_DB_PASS'],os.environ['CHURN_DB'])) # prefix the search path onto the listing, which does not specify the schema sql = "set search_path = '%s'; " % param_dict['schema']; # load the sql file sql = sql + myfile.read() # bind any parameters that match strings in the sql param_keys = [p for p in param_dict.keys() if p not in reserved_param_keywords] for p in param_keys: sql = sql.replace(p, str(param_dict[p])) # Print the sql (then remove the newlines) print('SQL:\n----------\n'+sql+'\n----------\nRESULT:') sql = sql.replace('\n', ' ') # Run in the manner indicated by the mode if param_dict['mode'] == 'run': db.run(sql) elif param_dict['mode'] == 'one': res = db.one(sql) print(res) elif param_dict['mode'] == 'top' or param_dict['mode'] == 'save': res = db.all(sql) df = pd.DataFrame(res) if param_dict['mode'] == 'save': save_path = '../../../fight-churn-output/' + param_dict['schema'] + '/' os.makedirs(save_path,exist_ok=True) csv_path=save_path + param_dict['schema'] + '_' + param_dict['name'].replace(param_dict['prefix'],'') + '.csv' print('Saving: %s' % csv_path) df.to_csv(csv_path, index=False) else: print(df.head(print_num_rows)) else: print('Unknown run mode for SQL example') exit(-4)
class WithCursorFactory(WithSchema): def setUp(self): # override self.db = Postgres(cursor_factory=self.cursor_factory) self.db.run("DROP SCHEMA IF EXISTS public CASCADE") self.db.run("CREATE SCHEMA public") self.db.run("CREATE TABLE foo (key text, value int)") self.db.run("INSERT INTO foo VALUES ('buz', 42)") self.db.run("INSERT INTO foo VALUES ('biz', 43)")
def get_nth_article(): global STRING_DB db = Postgres(STRING_DB) selectList = db.all("SELECT * FROM url;") allUrl = [item[1] for item in selectList] for feed in allRssFeed: print("parsing entries") print(feed) entries = feedparser.parse(feed).entries for i in reversed(range(10)): try: url = entries[i].link except Exception as e: print("excp1 ", e) continue if url not in allUrl: try: db.run( "INSERT INTO url (url) VALUES ('{}') ON CONFLICT (url) DO NOTHING;" .format(url)) except Exception as e: print("excp1", e) article = Article(url) article.download() article.parse() text = article.text articleImage = article.top_image articleTitle = article.title articleUrl = article.url string = text string = re.sub(r"Zoom © .*[\n]*\(Motorsport-Total\.com\)", "", string) # elimina string = re.sub( r"[0-9]+\. [A-Za-z]+ [0-9]+ - [0-9]+:[0-9]+ Uhr", "", string) # elimina data boldArticleContent = "" ###### #MULTITHREADING ###### multithreading = 1 if multithreading: threading.Thread(target=sendTelegraph, args=(articleImage, articleTitle, boldArticleContent, articleUrl, string, feed)).start() else: sendTelegraph(articleImage, articleTitle, boldArticleContent, articleUrl, string, feed)
class db(): "Wrapper for postgres https://postgres-py.readthedocs.org/en/2.1.2/index.html" def __init__(self,password=password, user=user, dbname=dbname): self.schema = schema self.dbname = dbname self.host = 'postgres://%s:%s@localhost/%s' % (password, user, dbname) self.path = os.path.abspath(os.path.join(os.path.dirname(__file__),"..")) def connect(self): print 'Connecting to %s ...' % self.host self.db_conn = Postgres(self.host) def init(self): print self.schema try: # Enable PostGIS (includes raster) self.db_conn.run("CREATE EXTENSION postgis") # Enable Topology self.db_conn.run("CREATE EXTENSION postgis_topology") # fuzzy matching needed for Tiger self.db_conn.run("CREATE EXTENSION fuzzystrmatch") # Enable US Tiger Geocoder self.db_conn.run("CREATE EXTENSION postgis_tiger_geocoder") # catch all exception except: print 'PostGis is already installed' query_hotspot = "CREATE TABLE IF NOT EXISTS %s.hotspot (gid serial, latitude numeric, longitude numeric, brightness numeric, scan numeric, track numeric, acq_date date, acq_time varchar(5), satellite varchar(1), confidence int4, version varchar(3), bright_t31 numeric, frp numeric, geom GEOGRAPHY(POINT,4326, 2), PRIMARY KEY(gid))" % self.schema query_country = "CREATE TABLE IF NOT EXISTS %s.country (gid serial, fips varchar(2), iso2 varchar(2), iso3 varchar(3), un int4, name varchar(50), area int4, pop2005 int4, region int4, subregion int4, lon float8, lat float8, geom GEOGRAPHY(MULTIPOLYGON, 4326, 2), PRIMARY KEY(gid))" % self.schema # create default tables self.db_conn.run(query_hotspot) self.db_conn.run(query_country) def register(self): '' #self.db_conn.register_model(Hotspot) def parseShape(self): path = "%s/global" % self.path ts = int(time.time()) # here we run two transaction from the parsed files for cmd in [{'db' : 'hotspot', 'parse' : 'Global_24h.shp', }, {'db' : 'country', 'parse': 'TM_WORLD_BORDERS-0.3.shp'}]: run = '/usr/local/bin/shp2pgsql -a -D -s 4326 -W latin1 -i %s/%s %s.%s | /usr/bin/psql -d %s' % (path, cmd['parse'], self.schema, cmd['db'], self.dbname) p = subprocess.Popen(run, stdout=subprocess.PIPE, shell=True) output, err = p.communicate() print output
class PostgresHelper: #PostgreSQL özellikle store procedure çalıştırmak için yardımcı sınıf def __init__(self, server, dbname, username): self.server = server self.dbname = dbname self.username = username self.db = Postgres("postgres://" + self.username + "@" + self.server + "/" + self.dbname) def executeCommand(self, commandStr): self.db.run(sql=commandStr) def getMultipleCursor(self, spname, parameters, refcursors): paramstr = "" cursorstr = "" for refcursor in refcursors: cursorstr += "'" + refcursor + "'," for param in parameters: paramstr += param + "," if paramstr.endswith(','): paramstr = paramstr[:-1] if cursorstr.endswith(','): cursorstr = cursorstr[:-1] data = {} with self.db.get_cursor() as cursor: cursor.run("select " + spname + "(" + paramstr + "," + cursorstr + ");") for refcursor in refcursors: fetchstr = 'FETCH ALL IN "' + refcursor + '";' tempdata = cursor.all(fetchstr) ##print(tempdata) data[refcursor] = tempdata return data ## kayit = self.db.run(sql="select "+spname+"("+paramstr+","+cursorstr+");") def getSingleCursor(self, spname, parameters): return self.getMultipleCursor(spname=spname, parameters=parameters, refcursors=["rc1"])
class CardProcessorProcess(Process): def __init__(self, cards, db_url): super().__init__() self.db = Postgres(url=db_url) self.cards = cards def info(self, msg): print(f'card processor process {os.getpid()}: {msg}') def _insert_card_cost_and_effect(self, card_id, cost, effect): self.db.run( f"INSERT INTO actions(card_id, cost, effect) VALUES(%(card_id)s, %(action_cost)s, %(action_effect)s)", { 'card_id': card_id, 'action_cost': cost, 'action_effect': effect }) def get_parsed_oracle_text_from_card(self, card): oracle_text = card.oracle_text if oracle_text is None: oracle_text = '' return OracleTextParser.parse_oracle_text(oracle_text) def parse_and_save_card_in_db(self, card): parsed_oracle_text = self.get_parsed_oracle_text_from_card(card) card_id = card.id for action in parsed_oracle_text: action_cost = json.dumps(str(action['cost'])) action_effect = json.dumps(str(action['effect'])) self._insert_card_cost_and_effect(card_id, action_cost, action_effect) def run(self): cards = self.cards self.info( 'Extracting cost and effect from each card and writing to db') for card_index in range(len(cards)): if card_index % 100 == 0: # throttle progress report from thread self.info(f'{math.floor((card_index / 1000) * 100)}%') card = cards[card_index] self.parse_and_save_card_in_db(card)
def setup_churn_db(schema_name): con_string = f"postgresql://localhost/{os.environ['CHURN_DB']}?user={os.environ['CHURN_DB_USER']}&password={os.environ['CHURN_DB_PASS']}" db = Postgres(con_string) tables = [ 'event', 'subscription', 'event_type', 'metric', 'metric_name', 'active_period', 'observation', 'active_week', 'account' ] print('Creating schema %s (if not exists)...' % schema_name) db.run('CREATE SCHEMA IF NOT EXISTS %s;' % schema_name) for t in tables: file_root = os.path.abspath(os.path.dirname(__file__)) with open('%s/schema/create_%s.sql' % (file_root, t), 'r') as sqlfile: sql = sqlfile.read().replace('\n', ' ') sql = sql.replace('x.', '%s.' % schema_name) print('Creating table %s (if not exists)' % t) db.run(sql)
def connect(): """ Connect to the PostgreSQL database server """ try: # connect to the PostgreSQL server print('Connecting to the PostgreSQL database...') # use a postgres connection string below db = Postgres( "host=localhost dbname=michaelmostachetti user=michaelmostachetti password=password" ) # This is where you will run your four queries # You can use either .one(), or .run() depending if you care # about the return values of the queries # Query 1 result1 = db.one("SELECT version();") print(result1) # Query 2 db.run("SELECT version();") # Query 3 db.run("SELECT version();") # Query 4 db.run("SELECT version();") except: print("An error occurred")
class WithSchema(TestCase): def setUp(self): self.db = Postgres(DATABASE_URL, cursor_factory=SimpleDictCursor) self.db.run("DROP SCHEMA IF EXISTS public CASCADE") self.db.run("CREATE SCHEMA public") def tearDown(self): self.db.run("DROP SCHEMA IF EXISTS public CASCADE") del self.db
class WithSchema(TestCase): def setUp(self): self.db = Postgres() self.db.run("DROP SCHEMA IF EXISTS public CASCADE") self.db.run("CREATE SCHEMA public") def tearDown(self): self.db.run("DROP SCHEMA IF EXISTS public CASCADE") del self.db
class TestCursorFactory(WithData): def setUp(self): # override self.db = Postgres(DATABASE_URL) self.db.run("DROP SCHEMA IF EXISTS public CASCADE") self.db.run("CREATE SCHEMA public") self.db.run("CREATE TABLE foo (bar text, baz int)") self.db.run("INSERT INTO foo VALUES ('buz', 42)") self.db.run("INSERT INTO foo VALUES ('biz', 43)") def test_NamedDictCursor_results_in_namedtuples(self): Record = namedtuple("Record", ["bar", "baz"]) expected = [Record(bar="biz", baz=43), Record(bar="buz", baz=42)] actual = self.db.all("SELECT * FROM foo ORDER BY bar") assert actual == expected def test_namedtuples_can_be_unrolled(self): actual = self.db.all("SELECT baz FROM foo ORDER BY bar") assert actual == [43, 42]
def init_DB(): global STRING_DB db = Postgres(STRING_DB) db.run( "CREATE TABLE IF NOT EXISTS url (id serial PRIMARY KEY, url varchar(100) unique );" ) db.run( "CREATE TABLE IF NOT EXISTS feed (id serial PRIMARY KEY, url varchar(100) unique);" ) db.run( "CREATE TABLE IF NOT EXISTS users (id serial PRIMARY KEY, chat_id int unique, name varchar(50), time_added varchar(20));" )
def delete_local_media_record(db: postgres.Postgres, media_id: str) -> None: db.run( "DELETE FROM local_media_repository WHERE media_id = %(media_id)s", media_id=media_id, )
if validation_error: sys.exit() eval(eval_string) samples.append(sample) # Register and insert into database in groups of 10: sample_groups = izip_longest(*(iter(samples),) * 10) db = Postgres(postgres_connection_string) for sample_group in sample_groups: sample_group = filter(None, sample_group) igsns = igsnClient.register_samples(sample_group) insert_sql = "INSERT INTO geosample (mount_type_id, igsn, geosample_name, location) VALUES" for i in range(0, len(igsns)): # Insert geosample record into database sample = sample_group[i] igsn = igsns[i] # WARNING: note that mount_type_id is not being set. All of this information should be in the IGSN record anyway. db.run(insert_sql + "(1, %(igsn)s, %(geosample_name)s, ST_PointFromText('POINT({0} {1})', 4326))".format(sample.get_longitude(), sample.get_latitude()), { 'igsn': igsn, 'geosample_name': sample.get_name() } ) print igsn del db
class MetricCalculator: def __init__(self, schema): ''' Initialize metric calculator from schema name. Loads parameter json from the adjacent conf directory. Loads date range from the configuration. Makes postgres connection with environment variables. :param schema: ''' with open('../conf/%s_metrics.json' % schema, 'r') as myfile: self.metric_dict = json.loads(myfile.read()) self.schema = schema self.from_date = self.metric_dict['date_range']['from_date'] self.to_date = self.metric_dict['date_range']['to_date'] self.non_metrics = ('date_range', 'event_properties') self.URI = "postgres://%s:%s@localhost/%s" % ( os.environ['CHURN_DB_USER'], os.environ['CHURN_DB_PASS'], os.environ['CHURN_DB']) self.db = Postgres(self.URI) with open('../sql/qa_metric.sql', 'r') as myfile: self.qa_sql = myfile.read().replace('\n', ' ') def remove_old_metrics_from_db(self, run_mets=None, no_warn=False): ''' Delete values of existing metrics. If no metrics are specified, it truncates the metric table. Otherwise just delete the specified metrics. :param run_mets: list of strings, metric names; or else None meaning truncate all metrics :return: ''' if run_mets is None: print('TRUNCATING *Metrics* in schema -> %s <- ...' % schema) if not no_warn and input( "are you sure? (enter %s to proceed) " % schema) == schema: exit(0) self.db.run('truncate table %s.metric' % schema) self.db.run('truncate table %s.metric_name' % schema) else: if isinstance(run_mets, str): run_mets = [run_mets] if len(run_mets) > 1 and not no_warn: print('DELETING * %d * Metrics in schema -> %s <- ...' % (len(run_mets), schema)) if input("are you sure? (enter %s to proceed) " % schema) != schema: exit(0) for m in run_mets: id = self.get_metric_id(m) if id is not None: deletSql="delete from %s.metric where metric_name_id=%d and metric_time between '%s'::timestamp and '%s'::timestamp" \ % (schema,id,self.from_date,self.to_date) print('Clearing old values: ' + deletSql) self.db.run(deletSql) def get_metric_id(self, metric_name): ''' Get the id of one metric from the database by name :param metric_name: string name of the metric :return: id number of the metric, assuming one was found; or else SQL returns NULL as None in Python ''' sql = "select metric_name_id from %s.metric_name where metric_name='%s'" % ( self.schema, metric_name) return self.db.one(sql) def add_metric_id(self, metric): ''' Add an id for a metric if it doesn't already exist :param metric: string name of the metric :return: ''' id = self.get_metric_id(metric) if id is None: id = self.db.one( 'select max(metric_name_id)+1 from %s.metric_name' % schema) if id is None: id = 0 insertNameSql = "insert into %s.metric_name (metric_name_id,metric_name) values (%d,'%s')" % ( schema, id, metric) self.db.run(insertNameSql) print('Inserted metric %s.%s as id %d' % (schema, metric, id)) return id def metric_qa_plot(self, metric, args): save_path = '../../../fight-churn-output/' + self.schema + '/' os.makedirs(save_path, exist_ok=True) print('Checking metric %s.%s' % (self.schema, metric)) id = self.get_metric_id(metric) if id is None: "No ID found for metric %s" % metric return aSql = self.qa_sql.replace('%metric_name_id', str(id)) aSql = aSql.replace('%schema', self.schema) aSql = aSql.replace('%from_date', self.from_date) aSql = aSql.replace('%to_date', self.to_date) print(aSql) res = pandas.read_sql_query(aSql, self.URI) if res.shape[0] == 0 or res['avg_val'].isnull().values.all(): print('\t*** No result for %s' % metric) return cleanedName = ''.join(e for e in metric if e.isalnum()) # res.to_csv(save_path+cleanedName+'_metric_qa.csv',index=False) # uncomment to save details plt.figure(figsize=(8, 10)) plt.subplot(4, 1, 1) plt.plot('calc_date', 'max_val', data=res, marker='', color='red', linewidth=2, label="max") if args.hideax: plt.gca().get_xaxis().set_visible( False) # Hiding y axis labels on the count plt.ylim(0, ceil(1.1 * res['max_val'].dropna().max())) plt.legend() plt.title(metric) plt.subplot(4, 1, 2) plt.plot('calc_date', 'avg_val', data=res, marker='', color='green', linewidth=2, label='avg') if args.hideax: plt.gca().get_xaxis().set_visible( False) # Hiding y axis labels on the count plt.ylim(0, ceil(1.1 * res['avg_val'].dropna().max())) plt.legend() plt.subplot(4, 1, 3) plt.plot('calc_date', 'min_val', data=res, marker='', color='blue', linewidth=2, label='min') if args.hideax: plt.gca().get_xaxis().set_visible( False) # Hiding y axis labels on the count # plt.ylim(0, ceil(2*res['min_val'].dropna().max())) plt.legend() plt.subplot(4, 1, 4) plt.plot('calc_date', 'n_calc', data=res, marker='', color='black', linewidth=2, label="n_calc") plt.ylim(0, ceil(1.1 * res['n_calc'].dropna().max())) plt.legend() plt.gca().figure.autofmt_xdate() if args.hideax: plt.gca().get_yaxis().set_visible( False) # Hiding y axis labels on the count monthFormat = mdates.DateFormatter('%b') plt.gca().get_xaxis().set_major_formatter(monthFormat) else: plt.gcf().autofmt_xdate() plt.savefig(save_path + 'metric_valqa_' + cleanedName + '.' + args.format) plt.close() def qa_metrics(self, args): ''' Loops over the configured metrics and makes the QA plot of each. If a list was provided, it only runs the ones in the list. :param run_mets: list of strings, metric names; or else None meaning calculate all configured metrics :param args: from argparse :return: ''' if args.metrics is None: for metric in self.metric_dict.keys(): if metric in self.non_metrics: continue self.metric_qa_plot(metric, args) else: for metric in args.metrics: self.metric_qa_plot(metric, args) def run_one_metric_calculation(self, metric): ''' Calculate one metric, by name. First adds the id, then loads the raw sql from the file. To set the bind variables it starts out with the second level dictionary for this metric from the main metric dictionary. Then it adds all of the metric parameters that are common to all metric calcultions, such as from and to dates, the metric name id, a schema and the value name. These are put into the SQL template with a simple replace (did not use the Postgres bind system because it was not flexible enough.) Finally, it runs the SQL. :param metric: string name of the metric :return: ''' assert metric in self.metric_dict, "No metric %s in metric dictionary!" % metric id = self.add_metric_id(metric) with open('../sql/%s.sql' % self.metric_dict[metric]['sql'], 'r') as myfile: sql = myfile.read().replace('\n', ' ') params = self.metric_dict[metric] params['metric_name_val'] = metric params['schema'] = schema params['from_date'] = self.from_date params['to_date'] = self.to_date params['metric_name_id'] = id bind_char = '%' for p in params.keys(): sql = sql.replace(bind_char + p, str(params[p])) print(sql) self.db.run(sql) def calculate_metrics(self, run_mets=None): ''' Loops over the configured metrics and runs them. If a list was provided, it only runs the ones in the list. :param run_mets: list of strings, metric names; or else None meaning calculate all configured metrics :return: ''' for metric in self.metric_dict.keys(): if (run_mets is not None and metric not in run_mets) or metric in self.non_metrics: continue self.run_one_metric_calculation(metric)
"""A `simple cursor`_ that returns tuples. """ class SimpleNamedTupleCursor(SimpleCursorBase, NamedTupleCursor): """A `simple cursor`_ that returns namedtuples. """ class SimpleDictCursor(SimpleCursorBase, RealDictCursor): """A `simple cursor`_ that returns dicts. """ def isexception(obj): """Given an object, return a boolean indicating whether it is an instance or subclass of :py:class:`Exception`. """ if isinstance(obj, Exception): return True if isclass(obj) and issubclass(obj, Exception): return True return False if __name__ == '__main__': from postgres import Postgres db = Postgres("postgres://jrandom@localhost/test") db.run("DROP SCHEMA IF EXISTS public CASCADE") db.run("CREATE SCHEMA public") import doctest doctest.testmod()
The SQL statements to create the tables are the adjacent directory ../schema ''' from postgres import Postgres import os import sys schema_name = 'socialnet7' if len(sys.argv) >= 2: schema_name = sys.argv[1] db = Postgres("postgres://%s:%s@localhost/%s" % (os.environ['CHURN_DB_USER'], os.environ['CHURN_DB_PASS'], os.environ['CHURN_DB'])) tables = [ 'event', 'subscription', 'event_type', 'metric', 'metric_name', 'active_period', 'observation', 'active_week', 'account' ] print('Creating schema %s (if not exists)...' % schema_name) db.run('CREATE SCHEMA IF NOT EXISTS %s;' % schema_name) for t in tables: with open('../schema/create_%s.sql' % t, 'r') as sqlfile: sql = sqlfile.read().replace('\n', ' ') sql = sql.replace('x.', '%s.' % schema_name) print('Creating table %s (if not exists)' % t) db.run(sql)
class ChurnSimulation: def __init__(self, model, start, end, init_customers, seed): ''' Creates the behavior/utility model objects, sets internal variables to prepare for simulation, and creates the database connection :param model: name of the behavior/utility model parameters :param start: start date for simulation :param end: end date for simulation :param init_customers: how many customers to create at start date ''' self.model_name = model self.start_date = start self.end_date = end self.init_customers = init_customers self.monthly_growth_rate = 0.1 self.util_mod = UtilityModel(self.model_name) behavior_versions = glob.glob('../conf/' + self.model_name + '_*.csv') self.behavior_models = {} self.model_list = [] for b in behavior_versions: version = b[(b.find(self.model_name) + len(self.model_name) + 1):-4] if version in ('utility', 'population', 'country'): continue behave_mod = FatTailledBehaviorModel(self.model_name, seed, version) self.behavior_models[behave_mod.version] = behave_mod self.model_list.append(behave_mod) if len(self.behavior_models) > 1: self.population_percents = pd.read_csv( '../conf/' + self.model_name + '_population.csv', index_col=0) self.util_mod.setChurnScale(self.behavior_models, self.population_percents) self.population_picker = np.cumsum(self.population_percents) self.country_lookup = pd.read_csv('../conf/' + self.model_name + '_country.csv') self.subscription_count = 0 self.tmp_sub_file_name = os.path.join( tempfile.gettempdir(), '{}_tmp_sub.csv'.format(self.model_name)) self.tmp_event_file_name = os.path.join( tempfile.gettempdir(), '{}_tmp_event.csv'.format(self.model_name)) self.db = Postgres( "postgres://%s:%s@localhost/%s" % (os.environ['CHURN_DB_USER'], os.environ['CHURN_DB_PASS'], os.environ['CHURN_DB'])) self.con = post.connect(database=os.environ['CHURN_DB'], user=os.environ['CHURN_DB_USER'], password=os.environ['CHURN_DB_PASS']) def remove_tmp_files(self): ''' Remove temp files when the simulation is over :return: ''' os.remove(self.tmp_event_file_name) os.remove(self.tmp_sub_file_name) def pick_customer_model(self): choice = random.uniform(0, 1) for m in range(0, self.population_picker.shape[0]): if choice <= self.population_picker['percent'][m]: version_name = self.population_picker.index.values[m] return self.behavior_models[version_name] def simulate_customer(self, start_of_month): ''' Simulate one customer collecting its events and subscriptions. This function has the core interaction between the simulation objects. Customer is created from the behavior model, and picking a random start date within the month. Then the customer objects simulates the events for the month, and the utility model determines if there is a churn based on the simulated event counts. :param start_of_month: :return: the new customer object it contains the events and subscriptions ''' # customer_model = self.pick_customer_model() customer_model = np.random.choice(self.model_list, p=self.population_percents['pcnt']) new_customer = customer_model.generate_customer(start_of_month) customer_country = np.random.choice(self.country_lookup['country'], p=self.country_lookup['pcnt']) new_customer.country = customer_country # Pick a random start date for the subscription within the month end_range = start_of_month + relativedelta(months=+1) this_month = start_of_month + timedelta( days=random.randrange((end_range - start_of_month).days)) churned = False while not churned: next_month = this_month + relativedelta(months=1) new_customer.subscriptions.append((this_month, next_month)) month_count = new_customer.generate_events(this_month, next_month) churned = self.util_mod.simulate_churn( month_count, new_customer) or next_month > self.end_date if not churned: this_month = next_month return new_customer def create_customers_for_month(self, month_date, n_to_create): ''' Creates all the customers for one month, by calling simulate_customer and copy_customer_to_database in a loop. :param month_date: the month start date :param n_to_create: number of customers to create within that month :return: ''' total_subscriptions = 0 total_events = 0 for i in range(n_to_create): customer = self.simulate_customer(month_date) self.copy_customer_to_database(customer) total_subscriptions += len(customer.subscriptions) total_events += len(customer.events) if i % 100 == 0: print( 'Simulated customer {}/{}: {:,} subscriptions & {:,} events' .format(i, n_to_create, total_subscriptions, total_events)) def copy_customer_to_database(self, customer): ''' Copy one customers data to the database, by first writing it to temp files and then using the sql COPY command :param customer: a Customer object that has already had its simulation run :return: ''' with open(self.tmp_sub_file_name, 'w') as tmp_file: for s in customer.subscriptions: tmp_file.write("%d,%d,'%s','%s','%s',%f,\\null,\\null,1\n" % \ (self.subscription_count, customer.id, self.model_name, s[0], s[1], 9.99)) # mrr is 9.99 self.subscription_count += 1 with open(self.tmp_event_file_name, 'w') as tmp_file: for e in customer.events: tmp_file.write("%d,'%s',%d\n" % (customer.id, e[0], e[1])) sql = "INSERT INTO {}.account VALUES({},'{}','{}',{})".format( self.model_name, customer.id, customer.channel, customer.date_of_birth.isoformat(), 'NULL' if customer.country == 'None' else "'{}'".format(customer.country)) self.db.run(sql) cur = self.con.cursor() sql = "COPY %s.subscription FROM STDIN USING DELIMITERS ',' WITH NULL AS '\\null'" % ( self.model_name) with open(self.tmp_sub_file_name, 'r') as f: cur.copy_expert(sql, f) self.con.commit() sql = "COPY %s.event FROM STDIN USING DELIMITERS ',' WITH NULL AS '\\null'" % ( self.model_name) with open(self.tmp_event_file_name, 'r') as f: cur.copy_expert(sql, f) self.con.commit() def truncate_old_sim(self): ''' Removes an old simulation from the database, if it already exists for this model :return: True if is safe to proceed (no data or data removed); False means old data not removed ''' oldEvent = self.db.one('select count(*) from %s.event' % self.model_name) oldSubs = self.db.one('select count(*) from %s.subscription' % self.model_name) oldAccount = self.db.one('select count(*) from %s.account' % self.model_name) if oldEvent > 0 or oldSubs > 0 or oldAccount > 0: print( 'TRUNCATING *Events/Metrics & Subscriptions/Observations* in schema -> %s <- ...' % self.model_name) if input("are you sure? (enter %s to proceed) " % self.model_name) == self.model_name: if oldEvent > 0: self.db.run('truncate table %s.event' % self.model_name) self.db.run('truncate table %s.metric' % self.model_name) if oldAccount > 0: self.db.run('truncate table %s.account' % self.model_name) if oldSubs > 0: self.db.run('truncate table %s.subscription' % self.model_name) self.db.run('truncate table %s.active_period' % self.model_name) self.db.run('truncate table %s.observation' % self.model_name) return True else: return False else: return True def run_simulation(self): ''' Simulation main function. First it prepares the database by truncating any old events and subscriptions, and inserting the event types into the database. Next it creeates the initial customers by calling create_customers_for_month, and then it advances month by month adding new customers (also using create_customers_for_month.) The number of new customers for each month is determined from the growth rate. Note that churn is not handled at this level, but is modeled at the customer level. :return: ''' # database setup if not self.truncate_old_sim(): return # Any model can insert the event types self.behavior_models[next(iter( self.behavior_models))].insert_event_types(self.model_name, self.db) # Initial customer count print('\nCreating %d initial customers for month of %s' % (self.init_customers, self.start_date)) self.create_customers_for_month(self.start_date, self.init_customers) print( 'Created %d initial customers with %d subscriptions for start date %s' % (self.init_customers, self.subscription_count, str(self.start_date))) # Advance to additional months next_month = self.start_date + relativedelta(months=+1) n_to_add = int(ceil(self.init_customers * self.monthly_growth_rate) ) # number of new customers in first month while next_month < self.end_date: print('\nCreating %d new customers for month of %s:' % (n_to_add, next_month)) self.create_customers_for_month(next_month, n_to_add) print( 'Created %d new customers for month %s, now %d subscriptions\n' % (n_to_add, str(next_month), self.subscription_count)) next_month = next_month + relativedelta(months=+1) n_to_add = int(ceil(n_to_add * (1.0 + self.monthly_growth_rate)) ) # increase the new customers by growth self.remove_tmp_files()
from collections import OrderedDict import os from postgres import Postgres POSTGRES_URL = os.environ.get('DATABASE_URL') db = Postgres(POSTGRES_URL) columns = OrderedDict() columns['QUERY_ADDRESS'] = 'varchar(255)' columns['RETURNED_ADDRESS'] = 'varchar(255)' columns['ES_SCORE'] = 'float8' columns['ES_LAT'] = 'float8' columns['ES_LONG'] = 'float8' columns['GOOG_LAT'] = 'float8' columns['GOOG_LONG'] = 'float8' columns['DISTANCE'] = 'float8' columns['ES_GEOCODED_AT'] = 'timestamptz' columns['GOOG_GEOCODED_AT'] = 'timestamptz' columns_sql = ['%s %s' % (key, value) for key, value in columns.items()] create_table_sql = 'CREATE TABLE geocoder (%s)' % ', '.join(columns_sql) db.run(create_table_sql)
Although Row objects support item lookups and assigments, they are not instances of the :class:`dict` class and they don't have its methods (:meth:`~dict.get`, :meth:`~dict.items`, etc.). """ back_as = 'Row' def isexception(obj): """Given an object, return a boolean indicating whether it is an instance or subclass of :class:`Exception`. """ if isinstance(obj, Exception): return True if isclass(obj) and issubclass(obj, Exception): return True return False if __name__ == '__main__': # pragma: no cover from postgres import Postgres db = Postgres() db.run("DROP SCHEMA IF EXISTS public CASCADE") db.run("CREATE SCHEMA public") db.run("CREATE TABLE foo (bar text, baz int)") db.run("INSERT INTO foo VALUES ('buz', 42)") db.run("INSERT INTO foo VALUES ('bit', 537)") import doctest doctest.testmod()
Call this when you update state in the database and you want to keep instance attributes in sync. Note that the only attributes we can set here are the ones that were given to us by the :py:mod:`psycopg2` composite caster machinery when we were first instantiated. These will be the fields of the composite type for which we were registered, which will be column names for table and view types. """ unknown = [] for name in kw: if name not in self.__read_only_attributes: unknown.append(name) if unknown: raise UnknownAttributes(unknown) self.__dict__.update(**kw) if __name__ == '__main__': from postgres import Postgres db = Postgres("postgres://jrandom@localhost/test") db.run("DROP SCHEMA IF EXISTS public CASCADE") db.run("CREATE SCHEMA public") db.run("DROP TABLE IF EXISTS foo CASCADE") db.run("CREATE TABLE foo (bar text, baz int)") db.run("INSERT INTO foo VALUES ('blam', 42)") db.run("INSERT INTO foo VALUES ('whit', 537)") db.run("CREATE VIEW bar AS SELECT bar FROM foo") import doctest doctest.testmod()
from collections import OrderedDict import os import csv import re from postgres import Postgres db = Postgres(os.environ.get('DATABASE_URL')) csv_reader = csv.DictReader(open('./data/ParcelCentroids.csv')) for index, row in enumerate(csv_reader): if re.search('^\d+$', row['PVANUM']): db.run("INSERT INTO parcels VALUES (%s, %s, %s, %s)", (row['X'], row['Y'], row['ADDRESS'], row['PVANUM'])) else: print row['PVANUM']
class SimpleNamedTupleCursor(NamedTupleCursor, SimpleCursorBase): """A `simple cursor`_ that returns namedtuples. """ class SimpleDictCursor(RealDictCursor, SimpleCursorBase): """A `simple cursor`_ that returns dicts. """ def isexception(obj): """Given an object, return a boolean indicating whether it is an instance or subclass of :py:class:`Exception`. """ if isinstance(obj, Exception): return True if isclass(obj) and issubclass(obj, Exception): return True return False if __name__ == '__main__': from postgres import Postgres db = Postgres("postgres://jrandom@localhost/test") db.run("DROP SCHEMA IF EXISTS public CASCADE") db.run("CREATE SCHEMA public") import doctest doctest.testmod()
""" unknown = None attnames = self.__class__.attnames for name in kw: if name not in attnames: if unknown is None: unknown = [name] else: unknown.append(name) if unknown: raise UnknownAttributes(unknown) _setattr = super(Model, self).__setattr__ for name, value in kw.items(): _setattr(name, value) if __name__ == '__main__': # pragma: no cover from postgres import Postgres db = Postgres() db.run("DROP SCHEMA IF EXISTS public CASCADE") db.run("CREATE SCHEMA public") db.run("DROP TABLE IF EXISTS foo CASCADE") db.run("CREATE TABLE foo (bar text, baz int)") db.run("INSERT INTO foo VALUES ('blam', 42)") db.run("INSERT INTO foo VALUES ('whit', 537)") db.run("CREATE VIEW bar AS SELECT bar FROM foo") import doctest doctest.testmod()
#!/usr/bin/python from http.server import BaseHTTPRequestHandler, HTTPServer from os import curdir, read, sep from postgres import Postgres import cgi PORT_NUMBER = 8080 db = Postgres("postgres://harrymoreno_backend@localhost/blog") db.run("CREATE TABLE foo (bar text, baz int)") #This class will handle any incoming request from the browser class myHandler(BaseHTTPRequestHandler): def do_GET(self): if self.path == "/": self.path = "/index_example3.html" try: #Check the file extension required and set the right mime type sendReply = False if self.path.endswith(".html"): mimetype = 'text/html' sendReply = True if self.path.endswith(".jpg"): mimetype = 'image/jpg' sendReply = True if self.path.endswith(".gif"): mimetype = 'image/gif' sendReply = True if self.path.endswith(".js"): mimetype = 'application/javascript' sendReply = True
def sql_listing(chapter, listing, name, schema, mode, param_dict, insert=False, save_ext=None): """ Run a SQL listing. The sql file is loaded, and then any non-reserved keyword in the parameters is treated as a string to be replaced in the sql string. The SQL is then printed out. :param chapter: :param listing: :param name: :param schema: :param mode:The allowed modes are: run : The SQL returns no result one : The SQL should return one row result to be printed top : The SQL returns many results, print the top N (given by global print_num_rows) :param param_dict: the bind variables as a dictionary :param insert: flag to use the insert form of a query; see chapter 7 :param save_ext: :return: """ with open( '../../listings/chap%d/%s.sql' % (chapter, _full_listing_name(chapter, listing, name, insert)), 'r') as myfile: db = Postgres("postgres://%s:%s@localhost/%s" % (os.environ['CHURN_DB_USER'], os.environ['CHURN_DB_PASS'], os.environ['CHURN_DB'])) # prefix the search path onto the listing, which does not specify the schema sql = "set search_path = '%s'; " % schema # load the sql file sql = sql + myfile.read() # bind any parameters that match strings in the sql param_keys = [ p for p in param_dict.keys() if p not in reserved_param_keywords ] for p in param_keys: sql = sql.replace(p, str(param_dict[p])) # Print the sql (then remove the newlines) print('SQL:\n----------\n' + sql + '\n----------\nRESULT:') sql = sql.replace('\n', ' ') # Run in the manner indicated by the mode if mode == 'run': db.run(sql) elif mode == 'one': res = db.one(sql) print(res) elif mode == 'top' or mode == 'save': res = db.all(sql) df = pd.DataFrame(res) if mode == 'save': save_path = '../../../fight-churn-output/' + schema + '/' os.makedirs(save_path, exist_ok=True) csv_path = save_path + schema + '_' + name.replace( 'listing_{}_{}_'.format(chapter, listing), '') if save_ext: csv_path = csv_path + '_' + save_ext csv_path = csv_path + '.csv' print('Saving: %s' % csv_path) df.to_csv(csv_path, index=False) else: print(df.head(print_num_rows)) else: print('Unknown run mode for SQL example') exit(-4)
if len(argv) == 1: stdout.write(bbcode_to_markdown(stdin.read().decode('utf-8', 'replace')).encode('utf-8')) else: from postgres import Postgres _, db_uri, db_table, id_column, db_column = argv info = {"table": db_table, "column": db_column, "id": id_column} db = Postgres(db_uri) count = db.one('SELECT COUNT(*) FROM "{table}"'.format(**info)) query = 'UPDATE "{table}" SET "{column}" = %(markdown)s WHERE "{id}" = %(id)s'.format(**info) failures = [] for i, row in enumerate(db.all('SELECT "{id}", "{column}" FROM "{table}"'.format(**info))): print("\x1b[0G{done}/{total}".format(done=i, total=count), end="") stdout.flush() try: db.run(query, {"id": getattr(row, id_column), "markdown": bbcode_to_markdown(getattr(row, db_column))}) except Exception as e: print() print(e) failures.append(getattr(row, id_column)) print() if failures: print("Failed:") print(" ".join(map(str, failures)))
from collections import OrderedDict import os from postgres import Postgres db = Postgres(os.environ.get('DATABASE_URL')) columns = OrderedDict() columns['x'] = 'float8' columns['y'] = 'float8' columns['address'] = 'varchar(255)' columns['parcel_id'] = 'int' columns_sql = ['%s %s' % (key, value) for key, value in columns.items()] create_table_sql = 'CREATE TABLE parcels(%s)' % ', '.join(columns_sql) db.run('DROP TABLE IF EXISTS parcels') db.run(create_table_sql)
import urlparse from postgres import Postgres import psycopg2 import os distances_file_name = "./data/distances.csv" beer_names_file_name = "./data/metadata.csv" username_file_name = "./data/usernames.csv" reviews_file_name = "./data/reviews.csv" db_location = os.environ.get("DATABASE_URL", "postgres://[email protected]:5432/postgres") db = Postgres(db_location) try: db.run("DROP TABLE beer_names") except psycopg2.ProgrammingError: pass finally: db.run("CREATE TABLE beer_names(beer_id int PRIMARY KEY NOT NULL, beer_name varchar, beer_image_url varchar)") with open (beer_names_file_name,"r") as infile: for line in infile: comma_seperated_values = line.strip().split(',') values = { "beer_id" : comma_seperated_values[0], "beer_name" : comma_seperated_values[1], "beer_image_url": comma_seperated_values[2] } db.run("INSERT INTO beer_names VALUES(%(beer_id)s,%(beer_name)s,%(beer_image_url)s)", values) try: