Exemplo n.º 1
0
    def upload_to_redshift(self, file_name):
        """ Upload from S3 to Redshift """
        REDSHIFT_TABLE_NAME = SCHEMA+"."+ADSOURCE.replace("-","_")

        pr.exec_commit("""
        CREATE TABLE IF NOT EXISTS
            {fn}
                 (id          INT IDENTITY(1,1),
                  date        DATE NOT NULL,
                  campaign    VARCHAR(256) NOT NULL,
                  adclicks    VARCHAR(256) NULL,
                  impressions VARCHAR(256) NULL,
                  adcost      FLOAT NULL,
                  updated_ts  TIMESTAMP NOT NULL
            );""".format(fn=REDSHIFT_TABLE_NAME))

        pr.exec_commit("""
          COPY {fn}
          FROM 's3://amaro-bi/{filepath}'
          ACCEPTINVCHARS 
          delimiter ','
          ignoreheader 1
          csv quote as '"'
          dateformat 'auto'
          timeformat 'auto'
          region 'sa-east-1'
          access_key_id '{acess_key}'
          secret_access_key '{secret_key}';
          """.format(fn=REDSHIFT_TABLE_NAME,
                     filepath=file_name,
                     acess_key=s3_credentials_AWS_ACCESS_KEY,
                     secret_key=s3_credentials_AWS_SECRET_KEY))

        print('Finished processing')
Exemplo n.º 2
0
def delete_from_date(date):
    """ Delete data from table """
    query = "DELETE FROM {table} WHERE date_sent >= '{datef}'".format(
        table=REDSHIFT_TABLE_NAME, datef=date)
    print("PRINT SQL STATEMENT: ", query)
    pr.exec_commit(query)
    return None
Exemplo n.º 3
0
def delete_from_date(date):
    """ Delete data from table """
    query = "DELETE FROM manual_data_sources.limesurvey_pedido_entregue_survey WHERE date_sent >= '{datef}'"
    sql_statm = query.format(datef=date)
    print("PRINT SQL STATEMENT: ", sql_statm)
    pr.exec_commit(sql_statm)
    return None
Exemplo n.º 4
0
def stops_to_durations():

    connect_to_redshift()

    df = pr.redshift_to_pandas("""select a.* from
        (select data_frame_ref, stop_id from stop_events group by data_frame_ref, stop_id) a
        left join
        (select data_frame_ref, departure_stop_id from trip_durations group by data_frame_ref, departure_stop_id) b
        on a.data_frame_ref = b.data_frame_ref
        	and a.stop_id = b.departure_stop_id
        where b.data_frame_ref is null
        	and b.departure_stop_id is null
            and a.data_frame_ref < trunc(convert_timezone('US/Pacific', GETDATE()))
            order by a.data_frame_ref, a.stop_id;""")

    n_days_dep_stops = df.shape[0]

    for i, row in df.iterrows():
        data_frame_ref = row['data_frame_ref']
        dep_stop_id = row['stop_id']
        print("Processing data_frame_ref {}, departure_stop_id {} ({} of {})".
              format(data_frame_ref, dep_stop_id, (i + 1), n_days_dep_stops))

        pr.exec_commit("""insert into trip_durations
            select a.data_frame_ref,
            	a.trip_id,
            	a.stop_id as departure_stop_id,
            	a.stop_time as departure_time,
            	a.stop_time_unix as departure_time_unix,
            	s.stop_id as arrival_stop_id,
            	s.stop_time as arrival_time,
            	s.stop_time_unix as arrival_time_unix,
            	s.stop_time_unix - a.stop_time_unix as trip_duration,
                date_trunc('hour', a.stop_time) as departure_time_hour
            from
            (select * from stop_events
            where data_frame_ref = '{}'
            and stop_id = {}) a
            join stop_events s
            on a.data_frame_ref = s.data_frame_ref
            and a.trip_id = s.trip_id
            and s.stop_time_unix > a.stop_time_unix""".format(
            data_frame_ref, dep_stop_id))

    pr.close_up_shop()
Exemplo n.º 5
0
 def delete_from_date(self, app_path):
     """ Delete data from table """
     sql_statm = "DELETE FROM manual_data_sources.rtb_ad_cost WHERE date >= '{datef}'".format(datef=self.get_start_date(app_path))
     print("PRINT SQL STATEMENT: ",sql_statm)
     pr.exec_commit(sql_statm)
     return None
Exemplo n.º 6
0
        table=REDSHIFT_TABLE_NAME, datef=date)
    print("PRINT SQL STATEMENT: ", query)
    pr.exec_commit(query)
    return None


delete_from_date(date='2018-01-01')

print('5. Create table')
pr.exec_commit("""
CREATE TABLE IF NOT EXISTS
    {table}
         (id_answer varchar(256),
         date_sent varchar(256),
         grade varchar(256),
         email varchar(256),
         order_number varchar(256),
         return_order_number varchar(256),
         language varchar(256),
         updated_ts varchar(256),
         return_channel varchar(256)
         );""".format(table=REDSHIFT_TABLE_NAME))

print('6. Upload to Redshift')
pr.exec_commit("""
  COPY {table}
  FROM 's3://amaro-bi/{filepath}'
  ACCEPTINVCHARS 
  delimiter ','
  ignoreheader 1
  csv quote as '"'