def run(churn_date): load_env() if os.getenv("CRM_DB_HOST") is None: print( 'CRM database connection settings not set in .env file, skipping churn/renewal data aggregation' ) exit() year = int(churn_date[0:4]) month = int(churn_date[4:6]) day = int(churn_date[6:8]) churn_date = date(year, month, day) crm_db_conn, crm_db_cur = create_mysql_connection(os.getenv("CRM_DB_USER"), os.getenv("CRM_DB_PASS"), os.getenv("CRM_DB_DB"), os.getenv("CRM_DB_HOST")) # churn date threshold says how long is the interval for user to renew subscription without being counted as a churn # by default 2 days churn_days_threshold = 2 subscriptions_stop_date = churn_date - timedelta(days=churn_days_threshold) churn_events_list, renewal_events_list = churn_or_renewed_users( crm_db_cur, subscriptions_stop_date, churn_days_threshold) crm_db_cur.close() crm_db_conn.close() postgre_conn, postgre_cur = create_con(os.getenv("POSTGRES_USER"), os.getenv("POSTGRES_PASS"), os.getenv("POSTGRES_DB"), os.getenv("POSTGRES_HOST")) # Delete events for particular day (so command can be safely run multiple times) event_types = ['churn', 'renewal'] postgre_cur.execute( ''' DELETE FROM events WHERE computed_for = %s AND type = ANY(%s) ''', (churn_date, event_types)) postgre_conn.commit() print("Saving churn events, count=" + str(len(churn_events_list))) if churn_events_list: save_events_to_aggregated_user_days(postgre_cur, subscriptions_stop_date, churn_events_list, "churn") save_events(postgre_cur, churn_date, churn_events_list, "churn") print("Saving renewal events, count=" + str(len(renewal_events_list))) if renewal_events_list: save_events_to_aggregated_user_days(postgre_cur, subscriptions_stop_date, renewal_events_list, "renewal") save_events(postgre_cur, churn_date, renewal_events_list, "renewal") postgre_cur.close() postgre_conn.close()
def plot_any_env_fig(env_path): env, info, _ = utils.load_env(env_path) # load an environment seq = doorkey_problem(env, info) # find the optimal action sequence fig_name = os.path.basename(env_path).split(".")[0] img_array = utils.draw_gif_from_seq( seq, utils.load_env(env_path)[0], save_name=fig_name) # draw a GIF & save return seq, img_array
def main(): #def main(): print('========== Map Information =========== ') mapname = 'doorkey-6x6-shortcut' env_path = './envs/' + mapname + '.env' print("Map Name: {n}".format(n=env_path)) env, info = load_env(env_path) # load an environment # Visualize the environment plot_env(env) print('<Environment Info>\n') print(info) # Map size # agent initial position & direction, # key position, door position, goal position print('======================================\n') goal_key = np.array(info['key_pos']) goal_door = np.array(info['door_pos']) goal_final = np.array(info['goal_pos']) # flag: close the door or not # 0 for key door path # 1 for direct path # the path and cost using the key and door FLAG = 0 seq_kd, cost_kd, policy_kd, v_t_kd = key_door_path(env, env_path, goal_key, goal_door, goal_final, FLAG) # the path and cost using the direct path FLAG = 1 # reset the map env, info = load_env(env_path) seq_dr, cost_dr, policy_dr, v_t_dr = direct_path(env, env_path, goal_final, FLAG) if cost_kd < cost_dr: draw_gif_from_seq(seq_kd, load_env(env_path)[0], path='./gif/' + mapname + '.gif') else: draw_gif_from_seq(seq_dr, load_env(env_path)[0], path='./gif/' + mapname + '.gif')
def partB(): env_folder = './envs/random_envs' env, info, env_path = utils.load_env(env_folder, load_random_env=True) seq = doorkey_problem(env, info) # find the optimal action sequence fig_name = os.path.basename(env_path).split(".")[0] img_array = utils.draw_gif_from_seq( seq, env, save_name=fig_name) # draw a GIF & save return seq, img_array
def run(file_date, aggregate_folder): load_env() commerce_file = os.path.join(aggregate_folder, "commerce", "commerce_" + file_date + ".csv") pageviews_file = os.path.join(aggregate_folder, "pageviews", "pageviews_" + file_date + ".csv") if not os.path.isfile(commerce_file): print("Error: file " + commerce_file + " does not exist") return if not os.path.isfile(pageviews_file): print("Error: file " + pageviews_file + " does not exist") return year = int(file_date[0:4]) month = int(file_date[4:6]) day = int(file_date[6:8]) cur_date = date(year, month, day) conn, cur = create_con(os.getenv("POSTGRES_USER"), os.getenv("POSTGRES_PASS"), os.getenv("POSTGRES_DB"), os.getenv("POSTGRES_HOST")) migrate(cur) conn.commit() event_types = ['conversion', 'shared_account_login'] # Delete events for particular day (so command can be safely run multiple times) cur.execute( ''' DELETE FROM events WHERE computed_for = %s AND type = ANY(%s) ''', (cur_date, event_types)) conn.commit() commerce_parser = CommerceParser(cur_date, cur) commerce_parser.process_file(commerce_file) pageviews_parser = SharedLoginParser(cur_date, cur) pageviews_parser.process_file(pageviews_file) cur.close() conn.close()
def run(file_date, aggregate_folder): load_env() pageviews_file = os.path.join(aggregate_folder, "pageviews", "pageviews_" + file_date + ".csv") pageviews_time_spent_file = os.path.join( aggregate_folder, "pageviews_time_spent", "pageviews_time_spent_" + file_date + ".csv") if not os.path.isfile(pageviews_file): print("Error: file " + pageviews_file + " does not exist") return conn, cur = create_con(os.getenv("POSTGRES_USER"), os.getenv("POSTGRES_PASS"), os.getenv("POSTGRES_DB"), os.getenv("POSTGRES_HOST")) migrate(cur) conn.commit() m = pattern.search(pageviews_file) date_str = m.group(2) year = int(date_str[0:4]) month = int(date_str[4:6]) day = int(date_str[6:8]) print("Updating 'aggregated_browser_days' and related tables") browser_parser = BrowserParser() browser_parser.process_files(pageviews_file, pageviews_time_spent_file) browser_parser.store_in_db(conn, cur, date(year, month, day)) print("Updating 'aggregated_user_days' and related tables") user_parser = UserParser() user_parser.process_files(pageviews_file, pageviews_time_spent_file) user_parser.store_in_db(conn, cur, date(year, month, day)) conn.commit() cur.close() conn.close()
import os from utils import load_env DEBUG = os.getenv('FLASK_DEBUG') == '1' SECRET_KEY = load_env('FLASK_SECRET_KEY') BLUEPRINTS = ['auth'] EXTENSIONS = list( map(lambda e: 'extensions.' + e, ['io', 'db', 'migrate', 'glue', 'ma', 'security'])) # Make sure SERVER_NAME contains the access port for # the http server if it is not a default port (ex: dv:8080) # Also, add "127.0.0.1 dv" to your /etc/hosts during development SERVER_NAME = os.getenv('SERVER_NAME') + os.getenv('SERVER_NAME_EXTRA', '') PSYCOPG2_URI = 'postgresql+psycopg2://{user}:{passwd}@{host}/{name}' SQLALCHEMY_DATABASE_URI = PSYCOPG2_URI.format( user=load_env('POSTGRES_USER'), passwd=load_env('POSTGRES_PASSWORD'), host='db', name=load_env('POSTGRES_DB')) SQLALCHEMY_TRACK_MODIFICATIONS = False
def main(): """Analysis of pedestrian and cyclist fatalities from 1982 to 2018.""" # load and check environment has been set utils.load_env() root, project = ( os.getenv("DATA_ROOT"), os.getenv("PROJECT_KEY"), ) # get spark session object spark = utils.create_spark_session() # read in accident data full_path = utils.get_dir( root, project, "interim", "FARS").joinpath("all_fatal_accidents_1982_to_2018.csv") accidents = utils.read_csv(full_path) # convert column to integer accidents = accidents.withColumn("FATALS", accidents["FATALS"].cast(T.IntegerType())) # prepare for analysis accidents.createOrReplaceTempView("accidents") # total number of accidents print(f"\nFatal Accidents 1982 to 1018: {accidents.count():,}") # read in geographic location codes as json glc_path = utils.get_dir( root, project, "external", "FRPP_GLC").joinpath("FRPP_GLC_United_States.json") location = spark.read.json( str(glc_path), mode="FAILFAST", multiLine=True, allowNumericLeadingZero=True, ) location.show(5) location.createOrReplaceTempView("location") # join the GLC and FARS dataframes and limit # scope to denver/seattle den_sea_fatalities = spark.sql(""" SELECT a.YEAR as Year, l.City_Name, sum(a.FATALS) as All_Fatalities FROM accidents a JOIN location l ON (a.STATE = l.State_Code AND a.COUNTY = l.County_Code AND a.CITY = l.City_Code) WHERE (l.State_Code = '08' AND l.City_Code = '0600') OR (l.State_Code = '53' AND l.City_Code = '1960') GROUP BY a.YEAR, l.City_Name ORDER BY a.YEAR """) den_sea_fatalities.show(5) # save the results den_sea_fatalities_path = ( utils.get_dir(root, project, "processed", "FARS") / "den_sea_fatalities.csv") utils.write_csv(den_sea_fatalities, den_sea_fatalities_path) # now just pedestrian and bicycle accidents den_sea_ped_bike_fatalities = spark.sql(""" SELECT a.YEAR as Year, l.City_Name, sum(a.FATALS) as Ped_Bike_Fatalities FROM accidents a JOIN location l ON (a.STATE = l.State_Code AND a.COUNTY = l.County_Code AND a.CITY = l.City_Code) WHERE ((l.State_Code = '08' AND l.City_Code = '0600') OR (l.State_Code = '53' AND l.City_Code = '1960')) AND (a.A_PED = 1 OR a.A_PEDAL = 1) GROUP BY a.YEAR, l.City_Name ORDER BY a.YEAR """) den_sea_ped_bike_fatalities.show(5) # save the results den_sea_ped_bike_fatalities_path = utils.get_dir( root, project, "processed", "FARS").joinpath("den_sea_ped_bike_fatalities.csv") utils.write_csv(den_sea_ped_bike_fatalities, den_sea_ped_bike_fatalities_path)
from flask import Flask, request, jsonify from markupsafe import escape from database import connect_db, Database from utils import response, load_env, hash_password from model import CreateUserRequestBodyModel, LoginUserRequestBodyModel from flask_pydantic import validate from datetime import datetime, timedelta import jwt from functools import wraps config = load_env() db = Database(config['MONGO_URL'], 'example') app = Flask(__name__) app.config['JSON_SORT_KEYS'] = False def check_auth(f): @wraps(f) def authenticate(*args, **kws): token = None # jwt is passed in the request header if 'x-auth-token' in request.headers: token = request.headers['x-auth-token'] # return 401 if token is not passed if not token: return response(401, {'message': 'Unauthorized access'}) try: # decoding the payload to fetch the stored details