def main(): try: analytics = ga_engine.initialize_analyticsreporting('web') except Exception as e: time.sleep(30) try: analytics = ga_engine.initialize_analyticsreporting('web') except Exception as e: db_log_error.log_error('Python_banners' , e) for i in range(4): try: step_time = (datetime.datetime.now().date() + relativedelta(days=-i)).strftime('%Y-%m-%d') data = banner.fetch_data(VIEW_ID, analytics, step_time, 'events') data.columns = ['date', 'new_customers', 'ctr', 'clicks', 'promotion_creative', 'promotion_id', 'promotion_name', 'promotion_position', 'impression', 'GMV', 'orders'] data['date'] = pd.to_datetime(data['date']) data['promotion_name'].replace('(not set)', sqlalchemy.sql.null(), inplace=True) data['promotion_creative'].replace('(not set)', sqlalchemy.sql.null(), inplace=True) data['promotion_id'].replace('(not set)', sqlalchemy.sql.null(), inplace=True) data.rename(columns={'promotion_name': 'promotionName', 'promotion_creative': 'promotionCreative', 'promotion_id': 'promotionID', 'promotion_position': 'promotionPosition', 'new_customers': 'newCustomers', }, inplace=True) ordered_cols = ['date','promotionID','promotionCreative','promotionName','promotionPosition','newCustomers','impression','clicks','orders','ctr','GMV'] data['GMV'] = round(data['GMV'],10) data['ctr'] = round(data['ctr'],10) data = data[ordered_cols] data = data[~data["promotionPosition"].str.contains('"')] data['promotionName'] = data['promotionName'].str.strip() data['promotionPosition'] = data['promotionPosition'].str.strip() data['promotionName'] = data['promotionName'].str.slice(0, 200 - 10) data['promotionPosition'] = data['promotionPosition'].str.slice(0, 200 - 10) except Exception as e: db_log_error.log_error('Python_banners' , e) try: delete_sql = "DELETE FROM [DB_Marketing].dbo.GA_Banner_Tracking WHERE date = '" + str(step_time) + "'" cursor.execute(delete_sql) cursor.commit() data.to_sql(TABLE_NAME, conalch, method="multi", if_exists='append', index=False, chunksize=10) time.sleep(1) print('done ' + str(step_time)) except Exception as e: db_log_error.log_error('Python_banners' , e)
def main(): analytics = ga_engine.initialize_analyticsreporting('web') limit_date = datetime.datetime.now().date() ref_date = validation(analytics) for i in range((limit_date - ref_date).days - 1): step_time = (ref_date + relativedelta(days=+i)).strftime('%Y-%m-%d') data = faq.fetch_data(VIEW_ID, analytics, step_time, 'pageview') data.columns = ['date', 'pagePath', 'pageViews'] data['pagePath'] = data['pagePath'].str.slice(0, 300) data['date'] = pd.to_datetime(data['date']) try: cursor.fast_executemany = True sql_comm = '''INSERT INTO [{}].[dbo].[{}] ([date],[pagePath], [pageViews]) VALUES (?,?,?)'''.format(DB_NAME, TABLE_NAME) cursor.executemany(sql_comm, data.values.tolist()) cursor.commit() doc = logger.create_log('Insert', 'Ack', step_time, socket.gethostname(), 'Successful Insert', server_len=len(data.index), database_len=len(data.index)) es_engine.log_into_es(es, 'textlogs-{}'.format(INDX), doc) except Exception as e: doc = logger.create_log('Insert', 'Nack', step_time, socket.gethostname(), str(e)) es_engine.log_into_es(es, 'textlogs-{}'.format(INDX), doc) time.sleep(2)
def main(): analytics = ga_engine.initialize_analyticsreporting('web') # ref_date = validation(analytics) # ref_date = datetime.datetime.strptime('2019-07-01', '%Y-%m-%d').date() ptrns = [ 'BRAND', 'CMP', 'HOME', 'LANDING', 'PDP', 'PLP', 'PROFILE', 'SEARCH', 'INCREDIBLE', 'THANKYOU' ] for i in range(6): step_time = today_date + relativedelta(days=-i - 3) for ptrn in ptrns: total_df = carousel.fetch_data(view_id, analytics, step_time.strftime('%Y-%m-%d'), ptrn) if total_df.empty: time.sleep(2) continue else: total_df.columns = [ 'date', 'pagepath', 'product_addtocarts', 'carousel_clicks', 'carousel_name', 'carousel_revenue', 'product_uniquepurchases' ] total_df['pagepath'] = total_df['pagepath'].map( lambda x: x.replace('?', '/')) total_df['date'] = pd.to_datetime(total_df['date']) total_df['source'] = data_type total_df = total_df[[ 'date', 'source', 'carousel_name', 'carousel_clicks', 'product_addtocarts', 'product_uniquepurchases', 'carousel_revenue' ]] total_df['carousel_name'] = total_df[ 'carousel_name'].str.strip() total_df['carousel_name'] = total_df[ 'carousel_name'].str.slice(0, 200 - 10) try: print(total_df) # cursor.fast_executemany = True # sql_comm = '''INSERT INTO [{}].[dbo].[{}]([date],[source],[carousel_name],[carousel_clicks],[product_addtocarts],[product_uniquepurchases],[carousel_revenue]) # VALUES (?,?,?,?,?,?,?)'''.format(DB_NAME, TABLE_NAME) # cursor.executemany(sql_comm, total_df.values.tolist()) # cursor.commit() # doc = logger.create_log('Insert', 'Ack', step_time, socket.gethostname(), # 'Successful Insert', server_len=len(total_df.index), # database_len=len(total_df.index)) # es_engine.log_into_es(es, 'textlogs-{}'.format(INDX), doc) print('done' + ' ' + str(step_time) + '**' + str(ptrn) + ' for ' + data_type) time.sleep(2) except Exception as e: doc = logger.create_log('Insert', 'Nack', step_time, socket.gethostname(), str(e)) es_engine.log_into_es(es, 'textlogs-{}'.format(INDX), doc)
def main(): try: analytics = ga_engine.initialize_analyticsreporting('web') except Exception as e: exec_sp = "EXEC [DB_DBA].[log].[Usp_Insert_ErrorLog] 'daily_users_data' , 'bimarketing' , '{}' , '{}' , NULL , NULL , NULL , 'Python' ,NULL , '[email protected];[email protected]' , '09384149786' , 0 , 0 ".format( datetime.datetime.now().replace(microsecond=0), e) cursor.execute(exec_sp) cursor.commit() for i in range(5): try: step_time = today_date + relativedelta(days=-i - 2) find_row_sql = "SELECT Id FROM [DB_Marketing].[dbo].[GA_DailyUsersByPeriod] WHERE CurrentDate = '{}' AND AppType = '{}'".format( step_time, app_type) cursor.execute(find_row_sql) for row in cursor.fetchall(): selected_id = row.Id for j in range(4): selected_value = 'CM_Users' if j == 0 else 'LM_Users' if j == 1 else 'LY_Users' if j == 2 else 'CD_Users' start_value = 'CurrentMonthStart' if j == 0 else 'LastMonthStart' if j == 1 else 'LastYearStart' if j == 2 else 'CurrentDate' end_value = 'CurrentMonthEnd' if j == 0 else 'LastMonthEnd' if j == 1 else 'LastYearEnd' if j == 2 else 'CurrentDate' start_date_sql = "SELECT {} Value FROM [DB_Marketing].[dbo].[GA_DailyUsersByPeriod] WHERE Id = {}".format( start_value, selected_id) cursor.execute(start_date_sql) for row in cursor.fetchall(): start_date = datetime.datetime.strftime( row.Value, '%Y-%m-%d') end_date_sql = "SELECT {} Value FROM [DB_Marketing].[dbo].[GA_DailyUsersByPeriod] WHERE Id = {}".format( end_value, selected_id) cursor.execute(end_date_sql) for row in cursor.fetchall(): end_date = datetime.datetime.strftime( row.Value, '%Y-%m-%d') total_df = daily_users.get_report(view_id, analytics, start_date, end_date) if total_df.empty: time.sleep(2) continue else: for index, row in total_df.iterrows(): to_update_value = int(row["ga:users"]) update_sql = "UPDATE [DB_Marketing].[dbo].[GA_DailyUsersByPeriod] SET {} = {} WHERE Id = {}".format( selected_value, to_update_value, selected_id) cursor.execute(update_sql) cursor.commit() print( "Date:{} AppTYpe:{} Selected_Value:{} ==> Updated".format( step_time, app_type, selected_value)) time.sleep(2) except Exception as e: exec_sp = "EXEC [DB_DBA].[log].[Usp_Insert_ErrorLog] 'daily_users_data' , 'bimarketing' , '{}' , '{}' , NULL , NULL , NULL , 'Python' ,NULL , '[email protected];[email protected]' , '09384149786' , 0 , 0 ".format( datetime.datetime.now().replace(microsecond=0), e) cursor.execute(exec_sp) cursor.commit()
def main(): analytics = ga_engine.initialize_analyticsreporting('web') limit_date = datetime.datetime.now().date() ref_date = validation(analytics) for i in range((limit_date - ref_date).days - 1): step_time = ref_date + relativedelta(days=+i) year, month = jalali.Gregorian(step_time).persian_tuple()[0:2] custom_start = jalali.Persian(year, month, 1).gregorian_datetime() df_part1 = active_users.fetch_data_daily( VIEW_ID, analytics, step_time.strftime('%Y-%m-%d'), 'web') df_part1.columns = ['date', 'category', 'sessions', 'dailyUsers'] df_part2 = active_users.fetch_data_monthly( VIEW_ID, analytics, step_time.replace(day=1).strftime('%Y-%m-%d'), step_time.strftime('%Y-%m-%d'), 'web') df_part2.columns = ['category', 'month', 'monthlyUsers'] df_part3 = active_users.fetch_data_custom_wrapper( VIEW_ID, analytics, custom_start, step_time, 'monthlyUsersJalali', 'web') df_part4 = active_users.fetch_data_custom_wrapper( VIEW_ID, analytics, step_time + relativedelta(days=-29), step_time, '30DaysWindow', 'web') df_part1['date'] = pd.to_datetime(df_part1['date']) total_df = df_part1.join(df_part2.set_index('category'), on='category') total_df = total_df.join(df_part3.set_index('category'), on='category') total_df = total_df.join(df_part4.set_index('category'), on='category') total_df.drop(['month'], axis=1, inplace=True) print(total_df) try: cursor.fast_executemany = True sql_comm = '''INSERT INTO [{}].[dbo].[{}] ([date],[category],[sessions],[dailyUsers],[monthlyUsers],[monthlyUsersJalali],[30DaysWindow]) VALUES (?,?,?,?,?,?,?)'''.format(DB_NAME, TABLE_NAME) cursor.executemany(sql_comm, total_df.values.tolist()) cursor.commit() doc = logger.create_log('Insert', 'Ack', step_time, socket.gethostname(), 'Successful Insert', server_len=len(total_df.index), database_len=len(total_df.index)) es_engine.log_into_es(es, 'textlogs-{}'.format(INDX), doc) except Exception as e: doc = logger.create_log('Insert', 'Nack', step_time, socket.gethostname(), str(e)) es_engine.log_into_es(es, 'textlogs-{}'.format(INDX), doc) time.sleep(2)
def main(): analytics = ga_engine.initialize_analyticsreporting('web') for i in range(5): try: step_time = today_date + relativedelta(days=-i) # step_time = datetime.datetime.strptime('2019-02-13' , '%Y-%m-%d').date() total_df = trends.fetch_data(view_id, analytics, step_time.strftime('%Y-%m-%d')) if total_df.empty: time.sleep(2) continue else: total_df.columns = [ 'date', 'productListName', 'productRevenuePerPurchase', 'unique_purchase' ] total_df['source'] = data_type total_df['date'] = pd.to_datetime(total_df['date']) total_df['productListName'] = total_df[ 'productListName'].str.strip() total_df['productListName'] = total_df[ 'productListName'].str.slice(0, 200 - 10) total_df = total_df[[ 'date', 'source', 'productListName', 'productRevenuePerPurchase', 'unique_purchase' ]] except Exception as e: db_log_error.log_error('Python_dk_trends_data', e) try: delete_sql = "DELETE FROM [DB_Marketing].[dbo].[GA_DK_Trends] WHERE date = '{}' AND source = '{}'".format( str(step_time), data_type) cursor.execute(delete_sql) cursor.commit() cursor.fast_executemany = True sql_comm = '''INSERT INTO [{}].[dbo].[{}]([date],[source],[productListName],[productRevenuePerPurchase],[unique_purchase]) VALUES (?,?,?,?,?)'''.format(DB_NAME, TABLE_NAME) cursor.executemany(sql_comm, total_df.values.tolist()) cursor.commit() print('done' + ' ' + str(step_time)) time.sleep(2) except Exception as e: db_log_error.log_error('Python_dk_trends_data', e)
def main(): analytics = ga_engine.initialize_analyticsreporting('web') limit_date = datetime.datetime.now().date() ref_date = validation(analytics) for i in range((limit_date - ref_date).days - 1): step_time = (ref_date + relativedelta(days=+i)).strftime('%Y-%m-%d') data = category.fetch_data(VIEW_ID, analytics, step_time, 'events') data.columns = [ 'supply_category', 'date', 'page_view', 'unique_page_view' ] data['date'] = pd.to_datetime(data['date']) data['supply_category'] = data['supply_category'].str.slice(0, 300 - 5) data['supply_category'].replace('(not set)', sqlalchemy.sql.null(), inplace=True) data.rename(columns={ 'supply_category': 'supplyCategory', 'page_view': 'pageView', 'unique_page_view': 'uniquePageView' }, inplace=True) try: data.to_sql(TABLE_NAME, cnxn, method="multi", if_exists='append', index=False, chunksize=10) doc = logger.create_log('Insert', 'Ack', step_time, socket.gethostname(), 'Successful Insert', server_len=len(data.index), database_len=len(data.index)) es_engine.log_into_es(es, 'textlogs-{}'.format(INDX), doc) except Exception as e: doc = logger.create_log('Insert', 'Nack', step_time, socket.gethostname(), str(e)) es_engine.log_into_es(es, 'textlogs-{}'.format(INDX), doc)
def main(): analytics = ga_engine.initialize_analyticsreporting('web') limit_date = datetime.datetime.now().date() ref_date = validation(analytics) # ref_date = datetime.datetime.strptime('2019-06-05', '%Y-%m-%d').date() ptrns = [ '/search/', '/promotion-page/', '/product-list/', '/cart/', '/brand/', '/dkp-', '/landing-page/', '/landings/', '/main/', '/profile/', 'adro.co/', 'homepage', 'mobile-homepage', 'outsource' ] types = { '/search/': 'search', '/promotion-page/': 'promotion', '/product-list/': 'product-list', '/cart/': 'cart', '/brand/': 'brand', '/dkp-': 'product', '/landing-page/': 'landing-page', '/landings/': 'landings', '/main/': 'main', 'homepage': 'homepage', 'mobile-homepage': 'mobile-homepage', '/profile/': 'profile', 'adro.co/': 'adro', 'outsource': 'outsource' } for i in range((limit_date - ref_date).days - 1): step_time = (ref_date + relativedelta(days=+i)).strftime('%Y-%m-%d') for ptrn in ptrns[:-1]: print(ptrn) if ptrn == 'homepage': data = cart.fetch_data(VIEW_ID, analytics, step_time, 'https://www.digikala.com/') elif ptrn == 'mobile-homepage': data = cart.fetch_data(VIEW_ID, analytics, step_time, 'https://mobile.digikala.com/') else: data = cart.fetch_data(VIEW_ID, analytics, step_time, ptrn) data.rename(columns={ 'ga:dimension5': 'total', 'ga:date': 'date', 'ga:hits': 'hits' }, inplace=True) data['total'] = data['total'].map(lambda x: str_to_dict(x)) data = data.dropna(subset=['total']) attributes = data['total'].apply(pd.Series) data = data.join(attributes) data.drop(['total'], axis=1, inplace=True) data.rename(columns={ 'page-path': 'pagepath', 'referrer-path': 'refpath' }, inplace=True) # eliminate hits due to the referrer data ... if ptrn == 'homepage': data = data.query( 'pagepath == "https://www.digikala.com/" or ' 'pagepath == "https://www.digikala.com/?ref=nav_logo"') elif ptrn == 'mobile-homepage': data = data.query( 'pagepath == "https://mobile.digikala.com/" or ' 'pagepath == "https://mobile.digikala.com/?ref=nav_logo"') else: data = data[data['pagepath'].str.contains(ptrn) == True] data[['pagepath', 'pagetype']] = path_parser.column_pattern_retriever( data, 'pagepath', ptrn, types[ptrn]) data['reftype'] = np.nan if data.empty: continue for p in ptrns: if p == 'homepage' or p == 'mobile-homepage': sub_data = data.query( 'refpath == "https://www.digikala.com/" or ' 'refpath == "https://www.digikala.com/?ref=nav_logo" or ' 'refpath == "https://mobile.digikala.com/?ref=nav_logo" or ' 'refpath == "https://mobile.digikala.com/"') else: sub_data = data[data['refpath'].str.contains(p) == True] if sub_data.empty: continue sub_data[['refpath', 'reftype']] = path_parser.column_pattern_retriever( sub_data, 'refpath', p, types[p]) data.update(sub_data) data['refpath'] = data['refpath'].map( lambda x: 'google' if x.startswith('https://www.google.') else ('bing' if x.startswith('https://www.bing.') else x)) data['reftype'] = data.apply( lambda row: 'outsource' if row['refpath'] == 'google' or row[ 'refpath'] == 'bing' else row['reftype'], axis=1) data['reftype'] = data.apply( lambda row: row['reftype'] if pd.notnull(row['reftype']) else 'other', axis=1) data['refpath'] = data.apply( lambda row: np.nan if row['reftype'] == 'other' else row['refpath'], axis=1) data['cart-id'] = data['cart-id'].apply(lambda x: np.nan if (x == 0 or x == '') else x) data['user-id'] = data['user-id'].apply(lambda x: np.nan if (x == 0 or x == '') else x) data['variant-id'] = data['variant-id'].apply( lambda x: np.nan if (x == 0 or x == '') else x) data.rename(columns={ 'pagetype': 'pageType', 'pagepath': 'pagePath', 'reftype': 'referrerType', 'refpath': 'referrer', 'user-id': 'userID', 'cart-id': 'cartID', 'variant-id': 'variantID', }, inplace=True) data['pagePath'] = data['pagePath'].str.slice(0, 150 - 5) try: data['referrer'] = data['referrer'].str.slice(0, 150 - 5) except: pass data.loc[:, 'date'] = pd.to_datetime(data['date']) print(data.shape) try: data.to_sql(TABLE_NAME, cnxn, method="multi", if_exists='append', index=False, chunksize=10) doc = logger.create_log('Insert', 'Ack', step_time, socket.gethostname(), 'Successful Insert of {}'.format(ptrn), server_len=len(data.index), database_len=len(data.index)) es_engine.log_into_es(es, 'textlogs-{}'.format(INDX), doc) except Exception as e: doc = logger.create_log('Insert', 'Nack', step_time, socket.gethostname(), '{} ERROR: '.format(ptrn) + str(e)) es_engine.log_into_es(es, 'textlogs-{}'.format(INDX), doc) print('{} ... {} is Done!'.format(step_time, ptrn))
def main(): try: analytics = ga_engine.initialize_analyticsreporting('web') except Exception as e: time.sleep(30) try: analytics = ga_engine.initialize_analyticsreporting('web') except Exception as e: db_log_error.log_error('Python_dk_rawdata', e) for i in range(5): try: step_time = (datetime.datetime.now().date() + relativedelta(days=-i)).strftime('%Y-%m-%d') # find_row_sql = "SELECT Date FROM TMP_TestDates WHERE Id = {}".format(i+1) # cursor.execute(find_row_sql) # for row in cursor.fetchall(): # selected_date = row.Date # step_time = datetime.datetime.strptime(str(selected_date) , '%Y-%m-%d').date().strftime('%Y-%m-%d') try: total_df = rawdata.fetch_data(VIEW_ID, analytics, step_time, 'trash') except: time.sleep(30) total_df = rawdata.fetch_data(VIEW_ID, analytics, step_time, 'trash') if total_df.empty: time.sleep(2) print(str(step_time) + " is empty") continue else: total_df['ga:adContent'].replace('(not set)', '', inplace=True) total_df['ga:campaign'].replace('(not set)', '', inplace=True) total_df['ga:keyword'].replace('(not set)', '', inplace=True) total_df['view'] = View_Name total_df = total_df.rename( columns={ 'ga:adContent': 'adContent', 'ga:campaign': 'campaign', 'ga:date': 'date', 'view': 'view', 'ga:deviceCategory': 'deviceCategory', 'ga:operatingSystem': 'OS', 'ga:transactions': 'goal12Completions', 'ga:keyword': 'keyword', 'ga:medium': 'medium', 'ga:sessions': 'sessions', 'ga:source': 'source', 'ga:users': 'users' }) ordered_cols = [ 'adContent', 'campaign', 'date', 'view', 'deviceCategory', 'OS', 'ga:goal12Completions', 'keyword', 'medium', 'sessions', 'source', 'users' ] total_df['date'] = pd.to_datetime(total_df['date']) total_df = total_df[ordered_cols] total_df['adContent'] = total_df['adContent'].str.strip() total_df['campaign'] = total_df['campaign'].str.strip() total_df['deviceCategory'] = total_df[ 'deviceCategory'].str.strip() total_df['keyword'] = total_df['keyword'].str.strip() total_df['medium'] = total_df['medium'].str.strip() total_df['source'] = total_df['source'].str.strip() total_df['adContent'] = total_df['adContent'].str.slice( 0, 500 - 10) total_df['campaign'] = total_df['campaign'].str.slice( 0, 500 - 10) total_df['deviceCategory'] = total_df[ 'deviceCategory'].str.slice(0, 100 - 10) total_df['keyword'] = total_df['keyword'].str.slice( 0, 500 - 10) total_df['medium'] = total_df['medium'].str.slice(0, 100 - 10) total_df['source'] = total_df['source'].str.slice(0, 100 - 10) except Exception as e: print(e) db_log_error.log_error('Python_dk_rawdata', e) try: delete_sql = "DELETE FROM [DB_Marketing].[dbo].[GA_RawData_V2_test] WHERE date = '" + str( step_time) + "' and [view] = '" + View_Name + "'" cursor.execute(delete_sql) cursor.commit() cursor.fast_executemany = True sql_comm = '''INSERT INTO [{}].[dbo].[{}] ([adContent],[campaign],[date],[view],[deviceCategory],[OS],[goal12Completions],[keyword], [medium],[sessions],[source],[users]) VALUES (?,?,?,?,?,?,?,?,?,?,?,?)'''.format( DB_NAME, TABLE_NAME) cursor.executemany(sql_comm, total_df.values.tolist()) cursor.commit() print("Done " + str(step_time) + 'at :' + str(datetime.datetime.now())) except Exception as e: db_log_error.log_error('Python_dk_rawdata', e)
def main(): analytics = ga_engine.initialize_analyticsreporting('web') limit_date = datetime.datetime.now().date() ref_date = validation() ptrns = [ '/search/', '/promotion-page/', '/product-list/', '/cart/', '/brand/', '/dkp-', '/landing-page/', '/landings/', '/main/', 'homepage' ] types = { '/search/': 'search', '/promotion-page/': 'promotion', '/product-list/': 'product-list', '/cart/': 'cart', '/brand/': 'brand', '/dkp-': 'product', '/landing-page/': 'landing-page', '/landings/': 'landings', '/main/': 'main', 'homepage': 'home-page' } for i in range((limit_date - ref_date).days - 1): step_time = (ref_date + relativedelta(days=+i)).strftime('%Y-%m-%d') for ptrn in ptrns: data = pagepath.fetch_data(VIEW_ID, analytics, step_time, ptrn) if data.empty: continue data.columns = ['date', 'pagepath', 'pageview', 'unique_pageview'] data['pagepath'] = data['pagepath'].map( lambda x: x.replace('?', '/')) data = data[~data['pagepath'].str.contains('/users/register/')] data = data[~data['pagepath'].str.contains('/users/login/')] # backup data['backup'] = data['pagepath'] # distinguish compare & product if ptrn == '/dkp-': data['pagepath'] = data['pagepath'].map( lambda x: 'compare' if x.startswith( '/compare/dkp-') else path_parser.get_dkp(x)) elif ptrn == 'homepage': # get logo data list_dfs = [data] list_dfs.append( pagepath.fetch_data(VIEW_ID, analytics, step_time, 'dk-logo')) if list_dfs[1].empty: continue list_dfs[1].columns = [ 'date', 'pagepath', 'pageview', 'unique_pageview' ] list_dfs[1]['pagepath'] = 'dk-logo' data = pd.concat(list_dfs) else: data['pagepath'] = data['pagepath'].map( lambda x: ptrn[1:] + x.split(ptrn, 1)[-1]) special_subcats = lambda x: x.split('/',2)[1] if x.startswith('search/category-') \ else ('search' if x.startswith('search/') \ else ('cart' if x.startswith('cart/') else ('landing-page' if x.startswith('landing-page/') else x.split('/', 2)[1]))) data['pagepath'] = data['pagepath'].map(special_subcats) data['pageType'] = types[ptrn] data['device'] = 'dk-desktop' if ptrn in ['/promotion-page/', '/product-list/']: data['pageType'] = data.apply( lambda x: 'fresh-' + x['pageType'] if 'fresh=1' in x['backup'] else x['pageType'], axis=1) data.rename(columns={ 'pageview': 'pageView', 'unique_pageview': 'uniquePageView', 'pagepath': 'pagePath' }, inplace=True) ordered_cols = [ 'date', 'pageType', 'pagePath', 'pageView', 'uniquePageView' ] data = data[ordered_cols] data['pagePath'] = data['pagePath'].str.slice(0, 200 - 5) data.loc[:, 'date'] = pd.to_datetime(data['date']) data = data.groupby(['date', 'pageType', 'pagePath']).sum().reset_index() try: data.to_sql(TABLE_NAME, cnxn, method="multi", if_exists='append', index=False, chunksize=10) doc = logger.create_log('Insert', 'Ack', step_time, socket.gethostname(), 'Successful Insert of {}'.format(ptrn), server_len=len(data.index), database_len=len(data.index)) es_engine.log_into_es(es, 'textlogs-{}'.format(INDX), doc) except Exception as e: doc = logger.create_log('Insert', 'Nack', step_time, socket.gethostname(), '{} ERROR: '.format(ptrn) + str(e)) es_engine.log_into_es(es, 'textlogs-{}'.format(INDX), doc) print('{} ... {} is Done!'.format(step_time, ptrn))
def main(): analytics = ga_engine.initialize_analyticsreporting('ds-web') limit_date = datetime.datetime.now().date() ref_date = validation(analytics) for i in range((limit_date - ref_date).days - 1): step_time = (ref_date + relativedelta(days=+i)).strftime('%Y-%m-%d') total_df = rawdata.fetch_data(VIEW_ID, analytics, step_time, 'trash') total_df['ga:adContent'].replace('(not set)', '', inplace=True) total_df['ga:campaign'].replace('(not set)', '', inplace=True) total_df['ga:keyword'].replace('(not set)', '', inplace=True) #total_df.columns = ['adContent', 'campaign', 'date', 'deviceCategory', 'goal12Completions', # 'keyword', 'medium', 'sessions', 'source', 'users'] total_df = total_df.rename( columns={ 'ga:adContent': 'adContent', 'ga:campaign': 'campaign', 'ga:date': 'date', 'ga:deviceCategory': 'deviceCategory', 'ga:transactions': 'goal12Completions', 'ga:keyword': 'keyword', 'ga:medium': 'medium', 'ga:sessions': 'sessions', 'ga:source': 'source', 'ga:users': 'users' }) total_df['date'] = pd.to_datetime(total_df['date']) total_df['adContent'] = total_df['adContent'].str.strip() total_df['campaign'] = total_df['campaign'].str.strip() total_df['deviceCategory'] = total_df['deviceCategory'].str.strip() total_df['keyword'] = total_df['keyword'].str.strip() total_df['medium'] = total_df['medium'].str.strip() total_df['source'] = total_df['source'].str.strip() total_df['adContent'] = total_df['adContent'].str.slice(0, 500 - 10) total_df['campaign'] = total_df['campaign'].str.slice(0, 500 - 10) total_df['deviceCategory'] = total_df['deviceCategory'].str.slice( 0, 100 - 10) total_df['keyword'] = total_df['keyword'].str.slice(0, 500 - 10) total_df['medium'] = total_df['medium'].str.slice(0, 100 - 10) total_df['source'] = total_df['source'].str.slice(0, 100 - 10) try: cursor.fast_executemany = True sql_comm = '''INSERT INTO [{}].[dbo].[{}] ([adContent],[campaign],[date],[deviceCategory],[goal12Completions],[keyword], [medium],[sessions],[source],[users]) VALUES (?,?,?,?,?,?,?,?,?,?)'''.format( DB_NAME, TABLE_NAME) cursor.executemany(sql_comm, total_df.values.tolist()) cursor.commit() doc = logger.create_log('Insert', 'Ack', step_time, socket.gethostname(), 'Successful Insert', server_len=len(total_df.index), database_len=len(total_df.index)) es_engine.log_into_es(es, 'textlogs-{}'.format(INDX), doc) except pyodbc.Error as e: doc = logger.create_log('Insert', 'Nack', step_time, socket.gethostname(), str(e)) es_engine.log_into_es(es, 'textlogs-{}'.format(INDX), doc) sys.exit()
def main(): days = 7 # SCOPES = ['https://www.googleapis.com/auth/analytics.readonly'] # KEY_FILE_LOCATION = 'client_secret.json' VIEW_ID_DM = '26751439' # HELP: digikala.com view (desktop and mobileweb mixed) # credentials = ServiceAccountCredentials.from_json_keyfile_name(KEY_FILE_LOCATION, SCOPES) # analytics = build('analyticsreporting', 'v4', credentials=credentials) analytics = ga_engine.initialize_analyticsreporting('web') days_ago = str(days) + 'daysAgo' r_search = analytics.reports().batchGet( body={ 'reportRequests': [{ 'viewId': VIEW_ID_DM, 'dateRanges': [{ 'startDate': days_ago, 'endDate': 'today' }], 'metrics': [{ 'expression': 'ga:searchExits' }, { 'expression': 'ga:searchUniques' }, { 'expression': 'ga:searchRefinements' }, { 'expression': 'ga:searchResultViews' }, { 'expression': 'ga:searchSessions' }], 'dimensions': [{ 'name': 'ga:date' }], }] }).execute() df = pd.DataFrame(columns=[ 'search_date', 'search_exits', 'search_uniques', 'search_refinements', 'search_result_views' ]) for item in r_search['reports'][0]['data']['rows']: date_ = datetime.datetime.strptime(item['dimensions'][0], "%Y%m%d").date() values = item['metrics'][0]['values'] # HELP: 1th: searchExits, 2nd: searchUniques, 3rd: searchRefinements, 4th: searchResultViews, 5th: searchSessions df = df.append( { 'search_date': date_, 'search_exits': values[0], 'search_uniques': values[1], 'search_refinements': values[2], 'search_result_views': values[3], }, ignore_index=True) engine = sqlalchemy.create_engine( 'mssql+pyodbc://Shopping@Marketing:FDF7D@[email protected]:1433/DB_Marketing?driver=ODBC+Driver+17+for+SQL+Server' ) df['search_date'] = pd.to_datetime(df['search_date']) days = tuple(df.search_date.dt.strftime('%Y-%m-%d').to_list()) engine.execute("delete from GA_DK_Search where search_date in " + str(days)) df.to_sql(name='GA_DK_Search', con=engine, if_exists='append', chunksize=None, index=False)
def main(): fresh_suply = pd.DataFrame(mysql_queries.get_fresh_supply_cat(0)) main_cats = pd.DataFrame(mysql_queries.get_main_cats(0)) main_cats = main_cats.loc[main_cats['code'] == 'food-beverage'] fresh_suply['code'] = fresh_suply['code'].map(lambda x: 'category-' + x) analytics = ga_engine.initialize_analyticsreporting('web') limit_date = datetime.datetime.now().date() ref_date = validation() # ref_date = datetime.datetime.strptime('2019-07-06', '%Y-%m-%d').date() ptrns = ['/search/', '/promotion-page/', '/product-list/', '/dkp-', '/main/'] types = {'/search/': 'search', '/promotion-page/': 'promotion', '/product-list/': 'product-list', '/cart/': 'cart', '/brand/': 'brand', '/dkp-': 'product', '/landing-page/': 'landing-page', '/landings/': 'landings', '/main/': 'main', 'homepage': 'home-page'} for i in range((limit_date - ref_date).days - 1): step_time = (ref_date + relativedelta(days=+i)).strftime('%Y-%m-%d') for ptrn in ptrns: data = users_sources.fetch_data(VIEW_ID, analytics, step_time, ptrn) if data.empty: continue data.columns = ['date', 'landingpage', 'medium', 'newusers', 'source'] data['landingpage'] = data['landingpage'].map(lambda x: x.replace('?', '/')) data = data[~data['landingpage'].str.contains('/users/register/')] data = data[~data['landingpage'].str.contains('/users/login/')] # #backup data['backup'] = data['landingpage'] # distinguish compare & product if ptrn == '/dkp-': data['landingpage'] = data['landingpage'].map(lambda x: 'compare' if x.startswith('/compare/dkp-') else path_parser.get_dkp(x)) elif ptrn == 'homepage': # get logo data list_dfs = [data] list_dfs.append(users_sources.fetch_data(VIEW_ID, analytics, step_time, 'dk-logo')) if list_dfs[1].empty: continue list_dfs[1].columns = ['date', 'landingpage', 'medium', 'newusers', 'source'] list_dfs[1]['landingpage'] = 'dk-logo' data = pd.concat(list_dfs) else: data['landingpage'] = data['landingpage'].map(lambda x: ptrn[1:] + x.split(ptrn,1)[-1]) special_subcats = lambda x: x.split('/',2)[1] if x.startswith('search/category-') \ else ('search' if x.startswith('search/') \ else ('cart' if x.startswith('cart/') else ('landing-page' if x.startswith('landing-page/') else x.split('/', 2)[1]))) data['landingpage'] = data['landingpage'].map(special_subcats) data['pageType'] = types[ptrn] if ptrn in ['/promotion-page/', '/product-list/']: data['pageType'] = data.apply(lambda x: 'fresh-'+x['pageType'] if 'fresh=1' in x['backup'] else x['pageType'], axis=1) data.rename(columns={'newusers': 'new_users', 'pageType': 'page_type', 'landingpage': 'landingPage'}, inplace=True) ordered_cols = ['date', 'page_type', 'source', 'medium', 'landingPage', 'new_users'] data = data[ordered_cols] # data['source'].replace('(none)', sqlalchemy.sql.null(), inplace=True) # data['medium'].replace('(none)', sqlalchemy.sql.null(), inplace=True) data['landingPage'] = data['landingPage'].str.slice(0, 200 - 5) data['source'] = data['source'].str.slice(0, 200 - 5) data['meidum'] = data['medium'].str.slice(0, 50 - 5) data.loc[:, 'date'] = pd.to_datetime(data['date']) data = data.groupby(['date', 'page_type', 'landingPage', 'source', 'medium']).sum().reset_index() fresh_suply_tmp = fresh_suply.copy() if ptrn == '/dkp-': data['landingPage'] = pd.to_numeric(data['landingPage'], errors='coerce') data = data.dropna(subset=['landingPage']) data['landingPage'] = data['landingPage'].astype(int) data.rename(columns={'landingPage': 'product_id'}, inplace=True) outcome = data.merge(fresh_suply_tmp, how='inner', on = ['product_id']) outcome.drop('code', axis=1, inplace=True) outcome.rename(columns={'product_id': 'code'}, inplace=True) outcome = outcome.drop_duplicates() outcome.drop('supply_cat', axis=1, inplace=True) elif ptrn == '/search/': fresh_suply_tmp.drop('product_id', axis=1, inplace=True) fresh_suply_tmp = fresh_suply_tmp.drop_duplicates() data.rename(columns={'landingPage': 'code'}, inplace=True) outcome = data.merge(fresh_suply_tmp, how='inner', on=['code']) outcome.drop('supply_cat', axis=1, inplace=True) elif ptrn == '/product-list/' or ptrn == '/promotion-page/': data = data[data['page_type'].str.startswith('fresh-')] data.rename(columns={'landingPage': 'code'}, inplace=True) outcome = data elif ptrn == '/main/': data.rename(columns={'landingPage': 'code'}, inplace=True) outcome = data.merge(main_cats, how='inner', on=['code']) try: with engine.connect() as conn, conn.begin(): outcome.to_sql(TABLE_NAME, conn, if_exists='append', index=False) except Exception as e: doc = logger.create_log('Insert', 'Nack', step_time, socket.gethostname(), '{} ERROR: '.format(ptrn)+str(e)) es_engine.log_into_es(es, 'textlogs-{}'.format(INDX), doc)
def main(): analytics = ga_engine.initialize_analyticsreporting('web') # limit_date = datetime.datetime.now().date() # ref_date = validation() ptrns = ['/search/', '/promotion-page/', '/product-list/', '/cart/', '/brand/', '/dkp-', '/landing-page/', '/landings/', '/main/', 'homepage' , '/incredible-offers', '/profile/', '/checkout', '/cash-on-delivery'] types = {'/search/': 'search', '/promotion-page/': 'promotion', '/product-list/': 'product-list', '/cart/': 'cart', '/brand/': 'brand', '/dkp-': 'product', '/landing-page/': 'landing-page', '/landings/': 'landings', '/main/': 'main', 'homepage': 'home-page', '/incredible-offers': 'incredible offer', '/profile/': 'profile', '/checkout': 'thankyou page', '/cash-on-delivery': 'thankyou page'} for i in range(4): step_time = (datetime.datetime.now().date() + relativedelta(days=-i)).strftime('%Y-%m-%d') for ptrn in ptrns: data = pagepath.fetch_data(VIEW_ID, analytics, step_time, ptrn) if data.empty: time.sleep(3) continue data.columns = ['date', 'pagepath', 'pageview', 'unique_pageview'] data['pagepath'] = data['pagepath'].map(lambda x: x.replace('?', '/')) data = data[~data['pagepath'].str.contains('/users/register/')] data = data[~data['pagepath'].str.contains('/users/login/')] # backup data['backup'] = data['pagepath'] # distinguish compare & product if ptrn == '/dkp-': data['pagepath'] = data['pagepath'].map(lambda x: 'compare' if x.startswith('/compare/dkp-') else path_parser.get_dkp(x)) elif ptrn == 'homepage': # get logo data list_dfs = [data] list_dfs.append(pagepath.fetch_data(VIEW_ID, analytics, step_time, 'dk-logo')) if list_dfs[1].empty: continue list_dfs[1].columns = ['date', 'pagepath', 'pageview', 'unique_pageview'] list_dfs[1]['pagepath'] = 'dk-logo' data = pd.concat(list_dfs) elif ptrn == '/incredible-offers' or ptrn == '/profile/' or ptrn == '/checkout' or ptrn == '/cash-on-delivery': data['pagepath'] = '' else: data['pagepath'] = data['pagepath'].map(lambda x: ptrn[1:] + x.split(ptrn,1)[-1]) special_subcats = lambda x: x.split('/',2)[1] if x.startswith('search/category-') \ else ('search' if x.startswith('search/') \ else ('cart' if x.startswith('cart/') else ('landing-page' if x.startswith('landing-page/') else x.split('/', 2)[1]))) data['pagepath'] = data['pagepath'].map(special_subcats) data['pageType'] = types[ptrn] if ptrn in ['/promotion-page/', '/product-list/']: data['pageType'] = data.apply(lambda x: 'fresh-'+x['pageType'] if 'fresh=1' in x['backup'] else x['pageType'], axis=1) elif ptrn == '/incredible-offers' or ptrn == '/profile/' or ptrn == '/checkout' or ptrn == '/cash-on-delivery': data['pagepath'] = data['pageType'] data.rename(columns={'pageview': 'pageView', 'unique_pageview': 'uniquePageView', 'pagepath': 'pagePath'}, inplace=True) ordered_cols = ['date', 'pageType', 'pagePath', 'pageView', 'uniquePageView'] data = data[ordered_cols] data['pagePath'] = data['pagePath'].str.slice(0, 200 - 5) data.loc[:, 'date'] = pd.to_datetime(data['date']) data = data.groupby(['date', 'pageType', 'pagePath']).sum().reset_index() time.sleep(3) try: cursor.fast_executemany = True sql_comm = '''INSERT INTO [{}].[dbo].[{}]([date],[pageType],[pagePath],[pageView],[uniquePageView]) VALUES (?,?,?,?,?)'''.format(DB_NAME, TABLE_NAME) cursor.executemany(sql_comm, data.values.tolist()) cursor.commit() # print('done') # data.to_sql(TABLE_NAME, cnxn, method="multi", if_exists='append', index=False, chunksize=10) # doc = logger.create_log('Insert', 'Ack', step_time, socket.gethostname(), # 'Successful Insert of {}'.format(ptrn), server_len=len(data.index), # database_len=len(data.index)) # es_engine.log_into_es(es, 'textlogs-{}'.format(INDX), doc) except Exception as e: print(e) # doc = logger.create_log('Insert', 'Nack', step_time, socket.gethostname(), '{} ERROR: '.format(ptrn)+str(e)) # es_engine.log_into_es(es, 'textlogs-{}'.format(INDX), doc) print('{} ... {} is Done!'.format(step_time, ptrn))
def main(): analytics = ga_engine.initialize_analyticsreporting('web') ptrns = [ '/search/', '/promotion-page/', '/product-list/', '/brand/', '/landing-page/', '/incredible-offers', '/seller/' ] types = { '/search/': 'search', '/promotion-page/': 'promotion', '/product-list/': 'product-list', '/brand/': 'brand', '/landing-page/': 'landing-page', '/incredible-offers': 'incredible offer', '/seller/': 'seller' } for i in range(3): step_time = (datetime.datetime.now().date() + relativedelta(days=-i - 1)).strftime('%Y-%m-%d') for ptrn in ptrns: data = url_filters.fetch_data(VIEW_ID, analytics, step_time, ptrn) if data.empty: time.sleep(3) break data.columns = ['date', 'pagepath', 'pageView', 'uniquePageView'] data['url_filter'] = data['pagepath'].str.split("?").str[1] data['source_url'] = data['pagepath'].str.split("?").str[0] data = ga_engine.splitDataFrameList(data, 'url_filter', '&') new = data["url_filter"].str.split("=", n=1, expand=True) data["filter"] = new[0] data["value"] = new[1] data['pageType'] = types[ptrn] ordered_cols = [ 'date', 'pageType', 'source_url', 'filter', 'value', 'pageView', 'uniquePageView' ] data = data[ordered_cols] data = data[data['filter'] != "q"] data = data[data['value'] != ""] if types[ptrn] == 'search': data = data[data['source_url'].str.contains("search")] data['date'] = pd.to_datetime(data['date']) data['source_url'] = data['source_url'].str.slice(0, 50 - 5) data['filter'] = data['filter'].str.slice(0, 20 - 5) data['value'] = data['value'].str.slice(0, 100 - 5) data['id'] = np.arange(data.shape[0]) data_count = data['id'].count() iterate_count = int(data_count / 20) start_count = 0 delete_sql = "DELETE FROM [DB_Marketing].[dbo].[GA_dk_url_filters_Data] WHERE date = '" + str( step_time) + "' and pageType = '" + str(types[ptrn]) + "'" cursor.execute(delete_sql) cursor.commit() if not data.empty: while start_count < data_count: df = data[(data['id'] > start_count) & (data['id'] <= start_count + iterate_count)] ordered_cols = [ 'date', 'pageType', 'source_url', 'filter', 'value', 'pageView', 'uniquePageView' ] df = df[ordered_cols] start_count = start_count + iterate_count try: cursor.fast_executemany = True sql_comm = '''INSERT INTO [{}].[dbo].[{}]([date],[pageType],[source_url],[filter],[value],[pageView],[uniquePageView]) VALUES (?,?,?,?,?,?,?)'''.format( DB_NAME, TABLE_NAME) cursor.executemany(sql_comm, df.values.tolist()) cursor.commit() except Exception as e: print('{} ... {} is Not Done! because ==> {}'.format( step_time, ptrn, e)) print('{} ... {} is Done!'.format(step_time, ptrn))