class PostgresOperator(BaseOperator): """ Executes sql code in a specific Postgres database :param postgres_conn_id: reference to a specific postgres database :type postgres_conn_id: string :param sql: the sql code to be executed :type sql: string or string pointing to a template file. Fil must have a '.sql' extensions. """ __mapper_args__ = {'polymorphic_identity': 'PostgresOperator'} template_fields = ('sql', ) template_ext = ('.sql', ) ui_color = '#ededed' @apply_defaults def __init__(self, sql, postgres_conn_id='postgres_default', autocommit=False, *args, **kwargs): super(PostgresOperator, self).__init__(*args, **kwargs) self.sql = sql self.postgres_conn_id = postgres_conn_id self.autocommit = autocommit def execute(self, context): logging.info('Executing: ' + self.sql) self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id) self.hook.run(self.sql, self.autocommit)
class PostgresOperator(BaseOperator): """ Executes sql code in a specific Postgres database :param postgres_conn_id: reference to a specific postgres database :type postgres_conn_id: string :param sql: the sql code to be executed :type sql: string or string pointing to a template file. Fil must have a '.sql' extensions. """ template_fields = ('sql',) template_ext = ('.sql',) ui_color = '#ededed' @apply_defaults def __init__( self, sql, postgres_conn_id='postgres_default', autocommit=False, *args, **kwargs): super(PostgresOperator, self).__init__(*args, **kwargs) self.sql = sql self.postgres_conn_id = postgres_conn_id self.autocommit = autocommit def execute(self, context): logging.info('Executing: ' + self.sql) self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id) self.hook.run(self.sql, self.autocommit)
def execute(self, context): if self.dag_dependencies: self.check_for_dependencies() tables_sql = \ """ SELECT table_name FROM information_schema.tables WHERE table_schema = '{0}' """.format(self.db_schema) hook = PostgresHook(self.db_conn_id) records = [record[0] for record in hook.get_records(tables_sql)] for record in records: if self.key in record: logging.info('Dropping: {}.{}'.format(str(self.db_schema), str(self.key))) drop_sql = \ """ DROP TABLE {0}.{1} """.format(self.db_schema, record) hook.run(drop_sql)
def create_target(): create_sql = """CREATE TABLE IF NOT EXISTS dim_sneakers( sneaker_id INT, sneaker_name VARCHAR, color VARCHAR, created_at TIMESTAMP, is_owned BOOLEAN, sold_at TIMESTAMP, trashed_at TIMESTAMP, given_at TIMESTAMP, manufacturer_name VARCHAR, collaborator_name VARCHAR, count_wears INT, count_cleans INT, count_walks INT, updated_at TIMESTAMP);""" #is_current BOOLEAN #Initialize target hook snql_hook = PostgresHook('snql') #Run create sql snql_hook.run(create_sql)
def get_news_meta(**kwargs): ''' Retrieve metadata for news articles published on our set of sources during the time period between our last crawl and the current execution time. ''' exec_date = kwargs.get('ds') or datetime.now() start_date = exec_date - timedelta(hours=3) api_key = kwargs.get('api_key') api = NewsAPI(api_key) query_args = { 'sources': SOURCE_LIST, 'page_size': 100, 'from': convert_time(start_date), 'to': convert_time(exec_date), 'language': 'en' } pg_hook = PostgresHook(postgres_conn_id='sumzero') pagination = api.paginate_articles(**query_args) for resp in pagination: meta_list = resp.articles if meta_list: rows = (meta_to_row(d) for d in meta_list) pg_hook.insert_rows('news_meta1', rows=rows, target_fields=('title', 'description', 'url', 'published_on'), commit_every=1000) return meta_list
def create_import_table(**kwargs): conn_id = kwargs.get('conn_id') pg_hook = PostgresHook(conn_id) sql = """ CREATE TABLE IF NOT EXISTS trips ( trip_id SERIAL8 PRIMARY KEY, city VARCHAR(16), duration_sec INT4, start_time TIMESTAMP, end_time TIMESTAMP, start_station_id FLOAT4, start_station_name VARCHAR(256), start_location_latitude FLOAT8, start_location_longitude FLOAT8, end_station_id FLOAT4, end_station_name VARCHAR(256), end_location_latitude FLOAT8, end_location_longitude FLOAT8, bike_id INT4, user_type VARCHAR(16), member_birth_year FLOAT4, member_gender VARCHAR(16) ); """ pg_hook.run(sql)
class PostgresOperator(BaseOperator): """ Executes sql code in a specific Postgres database :param postgres_conn_id: reference to a specific postgres database :type postgres_conn_id: string :param sql: the sql code to be executed :type sql: Can receive a str representing a sql statement, a list of str (sql statements), or reference to a template file. Template reference are recognized by str ending in '.sql' """ template_fields = ('sql', ) template_ext = ('.sql', ) ui_color = '#ededed' @apply_defaults def __init__(self, sql, postgres_conn_id='postgres_default', autocommit=False, parameters=None, *args, **kwargs): super(PostgresOperator, self).__init__(*args, **kwargs) self.sql = sql self.postgres_conn_id = postgres_conn_id self.autocommit = autocommit self.parameters = parameters def execute(self, context): logging.info('Executing: ' + str(self.sql)) self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id) self.hook.run(self.sql, self.autocommit, parameters=self.parameters)
def get_rates(ds, **kwargs): pg_hook = PostgresHook(postgres_conn_id='rates') api_hook = HttpHook(http_conn_id='openexchangerates', method='GET') # If either of these raises an exception then we'll be notified via # Airflow resp = api_hook.run('') resp = json.loads(resp.content) # These are the only valid pairs the DB supports at the moment. Anything # else that turns up will be ignored. valid_pairs = ( 'AED', 'AFN', 'ALL', 'AMD', 'ANG', 'AOA', 'ARS', 'AUD', 'AWG', 'AZN', 'BAM', 'BBD', 'BDT', 'BGN', 'BHD', 'BIF', 'BMD', 'BND', 'BOB', 'BRL', 'BSD', 'BTC', 'BTN', 'BWP', 'BYN', 'BYR', 'BZD', 'CAD', 'CDF', 'CHF', 'CLF', 'CLP', 'CNY', 'COP', 'CRC', 'CUC', 'CUP', 'CVE', 'CZK', 'DJF', 'DKK', 'DOP', 'DZD', 'EEK', 'EGP', 'ERN', 'ETB', 'EUR', 'FJD', 'FKP', 'GBP', 'GEL', 'GGP', 'GHS', 'GIP', 'GMD', 'GNF', 'GTQ', 'GYD', 'HKD', 'HNL', 'HRK', 'HTG', 'HUF', 'IDR', 'ILS', 'IMP', 'INR', 'IQD', 'IRR', 'ISK', 'JEP', 'JMD', 'JOD', 'JPY', 'KES', 'KGS', 'KHR', 'KMF', 'KPW', 'KRW', 'KWD', 'KYD', 'KZT', 'LAK', 'LBP', 'LKR', 'LRD', 'LSL', 'LTL', 'LVL', 'LYD', 'MAD', 'MDL', 'MGA', 'MKD', 'MMK', 'MNT', 'MOP', 'MRO', 'MTL', 'MUR', 'MVR', 'MWK', 'MXN', 'MYR', 'MZN', 'NAD', 'NGN', 'NIO', 'NOK', 'NPR', 'NZD', 'OMR', 'PAB', 'PEN', 'PGK', 'PHP', 'PKR', 'PLN', 'PYG', 'QAR', 'RON', 'RSD', 'RUB', 'RWF', 'SAR', 'SBD', 'SCR', 'SDG', 'SEK', 'SGD', 'SHP', 'SLL', 'SOS', 'SRD', 'STD', 'SVC', 'SYP', 'SZL', 'THB', 'TJS', 'TMT', 'TND', 'TOP', 'TRY', 'TTD', 'TWD', 'TZS', 'UAH', 'UGX', 'USD', 'UYU', 'UZS', 'VEF', 'VND', 'VUV', 'WST', 'XAF', 'XAG', 'XAU', 'XCD', 'XDR', 'XOF', 'XPD', 'XPF', 'XPT', 'YER', 'ZAR', 'ZMK', 'ZMW', 'ZWL') rates_insert = """INSERT INTO rates (pair, valid_until, rate) VALUES (%s, %s, %s);""" # If this raises an exception then we'll be notified via Airflow valid_until = datetime.fromtimestamp(resp['timestamp']) for (iso2, rate) in resp['rates'].iteritems(): # If converting the rate to a float fails for whatever reason then # just move on. try: rate = float(rate) except: continue iso2 = iso2.upper().strip() if iso2 not in valid_pairs or rate < 0: continue pg_hook.run(rates_insert, parameters=(iso2, valid_until, rate))
def insert_events(**kwargs): # airflow format 2020-10-22T00:00:00+00:00 '%Y-%m-%dT%H:%M:%S%z' # json file format 2020-10-22 20:36:00.178590 '%Y-%m-%d %H:%M:%S.%f' execution_time = kwargs['execution_date'] execution_time_str = datetime.strftime(execution_time, '%Y-%m-%d %H:%M:%S.%f') print('execution_time: ', execution_time_str) next_execution_time = kwargs['next_execution_date'] next_execution_time_str = datetime.strftime(next_execution_time, '%Y-%m-%d %H:%M:%S.%f') print('next_execution_time: ', next_execution_time_str) params = { 'time_from': execution_time_str, 'time_to': next_execution_time_str } events = requests.get('http://nginx:80/events', params) print(events.json()) pg_hook = PostgresHook(postgres_conn_id='pg_dw') query = """ INSERT INTO events_stg (event, time, unique_visitor_id, ha_user_id, browser, os, country_code) VALUES ('{}', '{}', NULLIF('{}', 'None'), NULLIF('{}', 'None'), NULLIF('{}', 'None'), NULLIF('{}', 'None'), NULLIF('{}', 'None')) """ for event in events.json(): clean_event = process_event(event) pg_hook.run( query.format(clean_event['event'], clean_event['time'], clean_event['unique_visitor_id'], clean_event['ha_user_id'], clean_event['browser'], clean_event['os'], clean_event['country_code']))
class PostgresOperator(BaseOperator): """ Executes sql code in a specific Postgres database :param postgres_conn_id: reference to a specific postgres database :type postgres_conn_id: string :param sql: the sql code to be executed :type sql: Can receive a str representing a sql statement, a list of str (sql statements), or reference to a template file. Template reference are recognized by str ending in '.sql' """ template_fields = ('sql',) template_ext = ('.sql',) ui_color = '#ededed' @apply_defaults def __init__( self, sql, postgres_conn_id='postgres_default', autocommit=False, *args, **kwargs): super(PostgresOperator, self).__init__(*args, **kwargs) self.sql = sql self.postgres_conn_id = postgres_conn_id self.autocommit = autocommit def execute(self, context): logging.info('Executing: ' + str(self.sql)) self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id) self.hook.run(self.sql, self.autocommit)
def create_import_table(**kwargs): conn_id = kwargs.get('conn_id') pg_hook = PostgresHook(conn_id) commands = (""" CREATE TABLE IF NOT EXISTS trips ( trip_id SERIAL8 PRIMARY KEY, city VARCHAR(16), duration_sec INT4, start_time TIMESTAMP, end_time TIMESTAMP, start_station_id FLOAT4, start_station_name VARCHAR(256), start_location_latitude FLOAT8, start_location_longitude FLOAT8, end_station_id FLOAT4, end_station_name VARCHAR(256), end_location_latitude FLOAT8, end_location_longitude FLOAT8, bike_id INT4, user_type VARCHAR(16), member_birth_year FLOAT4, member_gender VARCHAR(16) ); """, """ CREATE TABLE IF NOT EXISTS acs ( fips VARCHAR(19) PRIMARY KEY, bach_edu FLOAT8, tot_trips FLOAT8, trips_u10 FLOAT8, trips_1014 FLOAT8, trips_1519 FLOAT8, med_age FLOAT8, med_age_m FLOAT8, med_age_f FLOAT8, pop_tot FLOAT8, race_white FLOAT8, race_black FLOAT8, asian FLOAT8, smartphone FLOAT8, car FLOAT8, transit FLOAT8, bike FLOAT8, walk FLOAT8, male_tot FLOAT8, fmale_tot FLOAT8, units_tot FLOAT8, owned FLOAT8, rented FLOAT8, med_hh_inc FLOAT8, med_rent FLOAT8 ); COPY acs FROM '/home/ubuntu/bikeiq/data/acs.csv' DELIMITER ',' CSV HEADER; ALTER TABLE acs ADD COLUMN bg_id VARCHAR(12); UPDATE acs SET bg_id=substring(fips, 8); """) for command in commands: pg_hook.run(command)
def process_customers_order_dim(**kwargs): conn_id = kwargs.get('conn_id') pg_hook = PostgresHook(conn_id) sql = "select distinct customer.cust_name, order_info.order_id from customer, order_info where customer.customer_id=order_info.customer_id;" records = pg_hook.get_records(sql) print(records) return records
def cache_latest_rates(ds, **kwargs): redis_conn = redis.StrictRedis(host='redis') pg_hook = PostgresHook(postgres_conn_id='rates') latest_rates = """SELECT DISTINCT ON (pair) pair, rate FROM rates ORDER BY pair, valid_until DESC;""" for iso2, rate in pg_hook.get_records(latest_rates): redis_conn.set(iso2, rate)
def cache_latest_stocks(ds, **kwargs): redis_conn = redis.StrictRedis(host='redis') pg_hook = PostgresHook(postgres_conn_id='stocks') latest_stocks = """SELECT DISTINCT ON (symbol) symbol, price FROM stocks ORDER BY symbol, valid_until DESC;""" for iso2, stock in pg_hook.get_records(latest_stocks): redis_conn.set(iso2, stock)
def execute(self, context): logging.info('Executing: ' + str(self.sql)) if self.create_tunnel is True: self.create_ssh_tunnel() print("EXECUTING QUERY!") time.sleep(10) self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id, schema=self.database) self.hook.run(self.sql, self.autocommit, parameters=self.parameters)
def insert_table(conn_id, schema, src_table, dest_table): pg_hook = PostgresHook(postgres_conn_id=conn_id) select_cmd = """SELECT relname, n_live_tup from {} where schemaname = '{}' and relname != '{}';""".format(src_table, schema, dest_table) src_conn = pg_hook.get_conn() src_cursor = src_conn.cursor() src_cursor.execute(select_cmd) records = src_cursor.fetchall() dest_conn = pg_hook.get_conn() dest_cursor = dest_conn.cursor() execute_values(dest_cursor, "INSERT INTO {} VALUES %s".format(dest_table), records) dest_conn.commit()
def get_event_status(**kwargs): cur = PostgresHook('airflow_db').get_cursor() sql = """SELECT task_id, dag_id, to_Char(dttm, 'HH24:MI:SS') FROM public.log WHERE owner = 'airflow' AND event = 'failed' AND dttm >= current_date - interval '5 minutes'""" cur.execute(sql) result = cur.fetchall() return result
def execute(self, context): postgres_hook = PostgresHook(postgres_conn_id=self.postgres_conn_id) s3_hook = S3Hook(aws_conn_id=self.s3_conn_id) res = self.query_db(self.query, postgres_hook) res.seek(0) s3_hook.load_file_obj(res, key="egress/sources.airflow.csv", bucket_name="demo-bucket-temp-977338899", replace=True) return True
def load_data(ds, **kwargs): """ Processes the json data, checks the types and enters into the Postgres database. """ pg_hook = PostgresHook(postgres_conn_id='weather_id') file_name = str(datetime.now().date()) + '.json' tot_name = os.path.join(os.path.dirname(__file__), 'src/data', file_name) # open the json datafile and read it in with open(tot_name, 'r') as inputfile: doc = json.load(inputfile) # transform the data to the correct types and convert temp to celsius city = str(doc['name']) country = str(doc['sys']['country']) lat = float(doc['coord']['lat']) lon = float(doc['coord']['lon']) humid = float(doc['main']['humidity']) press = float(doc['main']['pressure']) min_temp = float(doc['main']['temp_min']) - 273.15 max_temp = float(doc['main']['temp_max']) - 273.15 temp = float(doc['main']['temp']) - 273.15 weather = str(doc['weather'][0]['description']) todays_date = datetime.now().date() # check for nan's in the numeric values and then enter into the database valid_data = True for valid in np.isnan([lat, lon, humid, press, min_temp, max_temp, temp]): if valid is False: valid_data = False break row = (city, country, lat, lon, todays_date, humid, press, min_temp, max_temp, temp, weather) insert_cmd = """INSERT INTO weather_table (city, country, latitude, longitude, todays_date, humidity, pressure, min_temp, max_temp, temp, weather) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);""" if valid_data is True: pg_hook.run(insert_cmd, parameters=row)
class PostgresOperator(BaseOperator): template_fields = ('sql', ) template_ext = ('.sql', ) ui_color = '#ededed' @apply_defaults def __init__(self, sql, table="public.new", postgres_conn_id='postgres_default', database=None, autocommit=False, *args, **kwargs): super(PostgresOperator, self).__init__(*args, **kwargs) # self.sql = sql self.postgres_conn_id = postgres_conn_id self.autocommit = autocommit self.table = table self.database = database self.sql = sql def execute(self, context): self.log.info('Executing: %s', self.sql) self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id, schema=self.database) self.hook.run(self.sql, self.autocommit) for output in self.hook.conn.notices: self.log.info(output) logging.info('Executing: ' + self.sql) self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id) self.hook.run(self.sql, self.autocommit) records = self.hook.get_records(f"SELECT COUNT(*) FROM {self.table}") if len(records) < 1 or len(records[0]) < 1: raise ValueError( f"Data quality check failed. {self.table} returned no results") num_records = records[0][0] if num_records < 1: raise ValueError( f"Data quality check failed. {self.table} contained 0 rows") self.log.info( f'Data quality on table {self.table} check passed with {records[0][0]} records' )
def create_table(conn_id, dest_table): pg_hook = PostgresHook(postgres_conn_id=conn_id) delete_cmd = """ DROP TABLE IF EXISTS {}; """.format(dest_table) create_cmd = """ CREATE table {} ( table_name VARCHAR(50), row_count INTEGER ) """.format(dest_table) dest_conn = pg_hook.get_conn() delete_cursor = dest_conn.cursor() delete_cursor.execute(delete_cmd) dest_cursor = dest_conn.cursor() dest_cursor.execute(create_cmd) dest_conn.commit()
def api_json_to_db(**kwargs): base_dir = Variable.get("tfl_park_base_dir") json_files = [ f for f in listdir(base_dir) if isfile(join(base_dir, f)) if '.json' in f ] db_tuples = [] for current_file in json_files: time_pattern = re.compile('[0-9]*') pattern_match = time_pattern.match(current_file) if pattern_match is not None: timestamp = pattern_match.group() with open(join(base_dir, current_file), 'r') as f: car_parks = json.loads(f.read()) for car_park in car_parks: park_id = car_park['id'] park_name = car_park['name'] for bay in car_park['bays']: park_bay_type = bay['bayType'] park_bay_occupied = bay['occupied'] park_bay_free = bay['free'] park_bay_count = bay['bayCount'] db_tuples.append( (timestamp, park_id, park_name, park_bay_type, park_bay_occupied, park_bay_free, park_bay_count)) target_fields = [ 't_stamp', 'park_id', 'park_name', 'park_bay_type', 'park_bay_occupied', 'park_bay_free', 'park_bay_count' ] db_hook = PostgresHook(postgres_conn_id='tfl_db') db_hook.insert_rows('tfl_data', db_tuples, target_fields) for current_file in json_files: remove(join(base_dir, current_file))
def execute(self, context): self.hook = PostgresHook(postgres_conn_id=self.redshift_conn_id) conn = self.hook.get_conn() log.info("Connected with " + self.redshift_conn_id) table = self.table s3_path = self.s3_path s3_role = self.s3_role s3_region = self.s3_region delimiter = self.delimiter encoding = self.encoding copy_statement = f""" COPY {table} FROM '{s3_path}' IAM_ROLE '{s3_role}' REGION '{s3_region}' DELIMITER '{delimiter}' ENCODGIN '{encoding}' """ cursor = conn.cursor() cursor.execute(copy_statement) cursor.close() load_datetime = self.load_datetime record_source = self.record_source log.info("Loaded completed with load_datetime " + load_datetime) meta_statement = f""" UPDATE {table} SET LOAD_DATETIME = '{load_datetime}'', RECORD_SOURCE = '{record_source}'' WHERE LOAD_DATETIME IS NULL AND RECORD_SOURCE IS NULL """ cursor = conn.cursor() cursor.execute(meta_statement) cursor.close() log.info("Metadata injection completed with load_datetime " + load_datetime) conn.commit()
def populate_target(**kwargs): #Pull data from staging staging_hook = PostgresHook('snql_staging') #Pull query from sql directory query = kwargs['templates_dict']['query'] #Execute query staging_results = staging_hook.get_records(query) #Initialize hook to snql snql_hook = PostgresHook('snql') #Delete current rows in target table table snql_hook.run("""DELETE FROM dim_sneakers""") #Insert new rows into target table snql_hook.insert_rows('dim_sneakers', staging_results)
def execute(self, context): self.log.info('Executing: %s', self.sql) self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id, schema=self.database) self.hook.run(self.sql, self.autocommit) for output in self.hook.conn.notices: self.log.info(output) logging.info('Executing: ' + self.sql) self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id) self.hook.run(self.sql, self.autocommit) records = self.hook.get_records(f"SELECT COUNT(*) FROM {self.table}") if len(records) < 1 or len(records[0]) < 1: raise ValueError( f"Data quality check failed. {self.table} returned no results") num_records = records[0][0] if num_records < 1: raise ValueError( f"Data quality check failed. {self.table} contained 0 rows") self.log.info( f'Data quality on table {self.table} check passed with {records[0][0]} records' )
def create_staging(): create_sql = """CREATE TABLE IF NOT EXISTS dim_sneakers( sneaker_id INT, sneaker_name VARCHAR, color VARCHAR, created_at TIMESTAMP, is_owned BOOLEAN, sold_at TIMESTAMP, trashed_at TIMESTAMP, given_at TIMESTAMP, manufacturer_name VARCHAR, collaborator_name VARCHAR, count_wears INT, count_cleans INT, count_walks INT, updated_at TIMESTAMP);""" #is_current BOOLEAN #Initialize staging hook staging_hook = PostgresHook('snql_staging') #Run create table query staging_hook.run(create_sql) #Run delete records query staging_hook.run("""DELETE FROM dim_sneakers""")
class SSHPostgresOperator(PostgresOperator): """ """ def __init__(self, sql, postgres_conn_id='postgres_default', ssh_conn_id='ssh_default', autocommit=False, parameters=None, database=None, create_tunnel=True, *args, **kwargs): super(PostgresOperator, self).__init__(*args, **kwargs) self.sql = sql self.ssh_conn_id = ssh_conn_id self.postgres_conn_id = postgres_conn_id self.autocommit = autocommit self.parameters = parameters self.database = database self.create_tunnel = create_tunnel def execute(self, context): logging.info('Executing: ' + str(self.sql)) if self.create_tunnel is True: self.create_ssh_tunnel() print("EXECUTING QUERY!") time.sleep(10) self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id, schema=self.database) self.hook.run(self.sql, self.autocommit, parameters=self.parameters) def create_ssh_tunnel(self): print("CREATING TUNNEL") if self.create_tunnel is True: tunnel = AstroSSHHook(ssh_conn_id=self.ssh_conn_id).create_tunnel()
def pull_and_insert(**kwargs): #Pull query from sql directory query = kwargs['templates_dict']['query'] #Initialize snql hook and pull data snql_hook = PostgresHook('snql') results = snql_hook.get_records(query) #Initialize staging hook and insert data to staging table staging_hook = PostgresHook('snql_staging') staging_hook.insert_rows('dim_sneakers', results)
def monthly_spent(TaskName , ds, prev_ds, **kwargs ): facebook = Variable.get("facebook_auth", deserialize_json=True) access_token = facebook["access_token"] ad_account_id = facebook["ad_account_id"] FacebookAdsApi.init(access_token=access_token) fromDate = prev_ds toDate = (datetime.strptime(ds, '%Y-%m-%d') + timedelta(days=-1)).strftime('%Y-%m-%d') fields = [ 'spend', 'account_id', ] params = { 'level': 'account', 'time_range': {'since': fromDate ,'until': toDate },} spendrows = AdAccount(ad_account_id).get_insights( fields=fields, params=params,) total = 0.0 for row in spendrows: total = total + float ( row["spend"]) row = (total, ds) connDB = PostgresHook (postgres_conn_id = 'airflow') FB_json_conn = BaseHook.get_connection('FB_json_conn').extra_dejson scommand = "insert into monthly_cost values( %s ,%s) " connDB.run(scommand, parameters =row) print ( "done, Cost from "+ prev_ds+ " to " + ds )
def get_stocks(ds, **context): symbol = context['params']['symbol'] pg_hook = PostgresHook(postgres_conn_id='stocks') api_hook = HttpHook(http_conn_id='alphavantage', method='GET') # If either of these raises an exception then we'll be notified via # Airflow resp = api_hook.run(f'query?function=TIME_SERIES_DAILY_ADJUSTED&symbol={symbol}&apikey=537201H9R203WT4C&datatype=csv') resp = json.loads(resp.content) # These are the only valid stocks the DB supports at the moment. Anything # else that turns up will be ignored. stocks_insert = """INSERT INTO stocks (symbol, valid_until, price) VALUES (%s, %s, %s);""" # If this raises an exception then we'll be notified via Airflow valid_until = datetime.fromtimestamp(resp['timestamp']) for iso2, price in resp['stocks'].items(): # If converting the price to a float fails for whatever reason then # just move on. try: price = float(price) except: continue iso2 = iso2.upper().strip() if iso2 not in stocks or price < 0: continue pg_hook.run(stocks_insert, parameters=(iso2, valid_until, price))
def get_news_articles(**kwargs): ''' Python callable for article scraping task. This will get executed after news metadata has been collected and validated for the current execution period. Target URLs will be retrieved from Postgres for articles published after the last run date of this task. Articles will be written to file. ''' # TODO: # Decide how to store articles in FS (method and schema) # Decide how to name log files # Decide whether to implement compression task pg_hook = PostgresHook(postgres_conn_id='sumzero') ds = kwargs.get('ds') or datetime.now().isoformat() ''' query_results = pg_hook.get_records( SELECT URL FROM news_meta WHERE published_on > %s; , parameters=[ds] ) urls = [record[0] for record in query_results] ''' urls = kwargs.get('urls') # Download articles and write them to a csv. time_str = datetime.now().isoformat() log_fname = 'test_{}.csv'.format(time_str) failed_log_fname = 'failed_test_{}.csv'.format(time_str) log_file = open(log_fname, 'w+') log_writer = csv.writer(log_file, delimiter=',', quotechar='|') failed_log = open(failed_log_fname, 'w+') failed_writer = csv.writer(failed_log, delimiter=',', quotechar='|') for url in urls: loader = Article(url) try: loader.download() loader.parse() log_writer.writerow((time_str, loader.text)) except Exception as error: failed_writer.writerow((time_str, str(error), url))
def redshift_call(sql, type_='run'): pghook = PostgresHook(postgres_conn_id="naveen_redshift") if type_ == 'run': return pghook.run(sql) else: return pghook.get_records(sql)
def execute(self, context): logging.info('Executing: ' + str(self.sql)) self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id) self.hook.run(self.sql, self.autocommit, parameters=self.parameters)
def execute(self, context): logging.info('Executing: ' + self.sql) self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id) self.hook.run(self.sql, self.autocommit)
def postgres_call(sql): pghook = PostgresHook(postgres_conn_id="naveen_ngrok_postgres") return pghook.get_records(sql)