import pandas as pd #accessing aws data import sys sys.path.append('../../utils') from db_utils import DBUtil #connect to aws dbu = DBUtil("jjill_redshift","/home/jjill/.databases.conf") #dbu = DBUtil("jjill_redshift","../../databases/database.conf") print 'get first query' query = """ with all_users as ( select ilink from jjill.jjill_keyed_data where department_name in ('Knit Tops','Woven Shirts','Dresses','Pants') and is_emailable_ind='Y' and order_date::date > '2018-01-01' and order_date::date < '2018-06-01' group by ilink order by ilink limit 3000 ), all_departments as ( select ( case when department_name not in('Woven Shirts','Dresses','Knit Tops','Pants') then 'Other_Dept' else department_name
#accessing aws data import sys sys.path.append('../utils') from db_utils import DBUtil #connect to aws #dbu = DBUtil("jjill_redshift","/home/jjill/.databases.conf") dbu = DBUtil("jjill_redshift", "../../databases/database.conf") print 'get first query' query = """ select ilink, department_name, date_part(mon,order_date) as month, date_part(yr,order_date) as year, count(*) as Total_Bought, sum(shipped_sold_amt) as shipped_sold_amt, avg(shipped_sold_amt) as avg_shipped_sold_amt, stddev_samp(shipped_sold_amt) as std_shipped_sold_amt, var_samp(shipped_sold_amt) as var_shipped_sold_amt, sum(discount) as discount, avg(discount) as avg_discount, stddev_samp(discount) as std_discount, var_samp(discount) as var_discount from jjill.jjill_keyed_data where is_emailable_ind='Y' and department_name in ('Woven Shirts','Knit Tops','Pants','Dresses') and order_date between '2017-05-01' and '2018-04-30' group by 1,2,3,4 order by ilink; """ df = dbu.get_df_from_query(query) df.to_pickle('../data/numericFts_may2017_apr2018.pkl')