Exemplo n.º 1
0
def save_audit_data_by_month(start_date):
    to_date = (start_date + relativedelta(months=1)).strftime('%Y-%m-%d')
    from_date = start_date.strftime('%Y-%m-%d')
    stime = time()
    df = pd.read_sql(sql=sql.format(from_date, to_date),
                     con=DBConnectionsFacade.get_edusson_replica())
    df.to_pickle(
        '/home/andrei/Python/sqlalchemy-lab/p2paid/audit_data2/{}-{}.{}.pkl'.
        format(from_date, to_date, len(df)))
    print("From {} to {} len: {}, time: {}".format(from_date, to_date, len(df),
                                                   time() - stime))
Exemplo n.º 2
0
"""

sql = """
SELECT *
FROM es_orders o 
LEFT JOIN es_product p1 ON p1.order_id = o.order_id
LEFT JOIN es_product_type_essay pe1 ON p1.product_id = pe1.product_id
where o.test_order = 0 
and o.is_first_client_order = 1

and o.order_id not in (
    select distinct p.order_id from es_order_preset p where p.order_id is not null
)

"""
df_exclude = pd.read_sql(sql2, con=DBConnectionsFacade.get_edusson_replica())
df = pd.read_sql(sql, con=DBConnectionsFacade.get_edusson_replica())

df.is_paid_order.describe()

stored_df = df.copy()
# df = df[['site_id', 'order_id', 'order_total_usd', 'pages', 'deadline', 'order_date', 'is_first_client_order', 'is_easy_bidding']].drop_duplicates()[~df.order_id.isin(df_exclude.order_id.values)]
#df = df.groupby(df.index).first()

cols = []
count = 1
for column in df.columns:
    if column == 'order_id':
        cols.append('order_id' + str(count))
        count += 1
        continue