def run1(): df = date(url2) df_map = pd.read_excel('C:\\Users\Administrator\Desktop\map.xlsx') df_map = df_map[['product_id', 'Flag']] df['time'] = df['pay_time'].apply(lambda x: str(x).split(' ')[0]) # 合并匹配表 df = pd.merge(left=df, right=df_map, on='product_id', how='left') df_out = pd.DataFrame() for x, y in df.groupby(['time']): y = pd.DataFrame(y.groupby('Flag').size()) y.columns = [x] df_out = pd.concat([df_out, y], axis=1) df_out.fillna(0, inplace=True) df_out.sort_values(by=df_out.columns[-1], ascending=0, inplace=True) df_out.reset_index(inplace=True) df_out.rename(columns={'index': '类型', 'Flag': '类型'}, inplace=True) print('\n第一个表运行完毕……') return df_out
# -*- coding: utf-8 -*- # author:Super.Shen import pandas as pd pd.set_option('expand_frame_repr', False) pd.set_option('display.max_rows', 1000) import warnings warnings.filterwarnings('ignore') from build.database import url1, date, url7 from build.Func import or_path, gb # # 导出数据 date(url7).to_excel(or_path('注册充值用户')) date(url1).to_excel(or_path('变动日志')) # # 读取充值新用户 df_reg = pd.read_excel(or_path('注册充值用户')) df_reg = gb(df_reg, '用户id', '充值金额') df_reg.rename(columns={'用户id': '用户ID'}, inplace=True) # 读取变动日志 df = pd.read_excel(or_path('变动日志')) # 数据分析 df['变动时间'] = df['变动时间'].apply(lambda x: pd.to_datetime(x)) df.sort_values('变动时间', inplace=True)
def run2(): df = date(url2) # 2018-11-27更新【前天注册新用户】 df3 = date(url8) # 提取充值数据的日期 df['day'] = df['pay_time'].apply( lambda x: pd.to_datetime(str(x).split(' ')[0])) # df和df2重新赋值 df2 = df[df['day'] == pd.to_datetime(yesterday)] # 注册人数df df = df[df['day'] == pd.to_datetime(bef_yesterday)] # 整理前2天当日的注册数据 df3['Flag'] = 'new' df3.rename(columns={'用户id': 'player_id'}, inplace=True) df3 = df3[['player_id', 'Flag']] df = pd.merge(left=df, right=df3, on='player_id', how='left') df['Flag'].fillna('old', inplace=True) df2 = pd.merge(left=df2, right=df3, on='player_id', how='left') df2['Flag'].fillna('old', inplace=True) i = 0 df_form = pd.DataFrame() def df_f(df): df_form.loc[i, '日期'] = '{}'.format(bef_yesterday) # 人数计算 df_form.loc[i, '新用户量'] = len( df[df['Flag'] == 'new']['player_id'].unique()) df_form.loc[i, '总用户量'] = len(df['player_id'].unique()) df_form.loc[i, '新用户占比'] = '%.2f%%' % (df_form.loc[i, '新用户量'] / df_form.loc[i, '总用户量'] * 100) # 金额消费计算 df_form.loc[i, '新用户消费金额'] = df[df['Flag'] == 'new']['amount'].sum() df_form.loc[i, '总消费'] = df['amount'].sum() df_form.loc[i, '新用户消费占比'] = '%.2f%%' % (df_form.loc[i, '新用户消费金额'] / df_form.loc[i, '总消费'] * 100) # 次日再消费人数 df_form.loc[i, '次日再消费用户量'] = len( df2[df2['Flag'] == 'new']['player_id'].unique()) df_form.loc[i, '次日再消费人数比'] = '%.2f%%' % (df_form.loc[i, '次日再消费用户量'] / df_form.loc[i, '新用户量'] * 100) # # 次日再消费金额计算 df_form.loc[i, '次日再消费金额'] = df2[df2['Flag'] == 'new']['amount'].sum() df_form.loc[i, '次日再消费金额比'] = '%.2f%%' % (df_form.loc[i, '次日再消费金额'] / df_form.loc[i, '新用户消费金额'] * 100) return df_form df_form = df_f(df) # 删除多余2列 del df_form['总用户量'] del df_form['总消费'] print('\n第二个表运行完毕……') return df_form
# -*- coding: utf-8 -*- # author:Super.Shen import pandas as pd pd.set_option('expand_frame_repr', False) pd.set_option('display.max_rows', 1000) import warnings warnings.filterwarnings('ignore') from build.database import url11, date, url77 from build.Func import or_path, gb # # # 导出数据 date(url77).to_excel(or_path('注册充值用户2')) date(url11).to_excel(or_path('变动日志2')) # exit() # # 读取充值新用户 df_reg = pd.read_excel(or_path('注册充值用户2')) df_reg = gb(df_reg, '用户id', '充值金额') df_reg.rename(columns={'用户id': '用户ID'}, inplace=True) # 读取变动日志 df = pd.read_excel(or_path('变动日志2')) # 数据分析 df['变动时间'] = df['变动时间'].apply(lambda x: pd.to_datetime(x))
# -*- coding: utf-8 -*- # author:Super.Shen import pandas as pd from Func import day import warnings from build.database import date, url4, yesterday warnings.filterwarnings('ignore') from Func import append_excel date(url4).to_excel( 'C:\\Users\Administrator\Desktop\奇奇乐付费新用户留存统计\充值数据\\{}.xls'.format( yesterday), index=False) # 读取每日登入用户并合并 df = append_excel('C:\\Users\Administrator\Desktop\奇奇乐付费新用户留存统计\登入数据') df_cz = append_excel('C:\\Users\Administrator\Desktop\奇奇乐付费新用户留存统计\充值数据') df_zc = append_excel('C:\\Users\Administrator\Desktop\奇奇乐付费新用户留存统计\注册数据') # 【充值表】生成:去重列 df_cz['time'] = df_cz['pay_time'].apply( lambda x: pd.to_datetime(str(x).split(' ')[0])) df_cz['on'] = df_cz['time'].apply( lambda x: str(x)) + '|' + df_cz['player_id'].apply(lambda x: str(x)) # 充值表自我去重 df_cz = df_cz.drop_duplicates(subset=['on'], keep='first')
def fx(day): df, df1, df2 = lz_3lc(day) date(df).to_excel(path + '充值数据\\{}.xlsx'.format(day), index=False) Register(date(df1)).to_excel(path + '注册数据\\{}.xlsx'.format(day), index=False) Login(date(df2)).to_excel(path + '登入数据\\{}.xlsx'.format(day), index=False)
pd.set_option('expand_frame_repr', False) pd.set_option('display.max_rows', 1000) import warnings warnings.filterwarnings('ignore') import datetime today = datetime.date.today() day_cut = int( str(pd.to_datetime(today) - pd.to_datetime('2018/11/14')).split(' ')[0]) df_all = pd.DataFrame() for x in range(day_cut, 0, -1): day = today - datetime.timedelta(days=x) df = date(lc_url(day)) df['time'] = day df_all = df_all.append(df) print('{}号抓取完毕!'.format(day)) df_all.reset_index(inplace=True) df_all = df_all[['time', 'counts', 'day1', 'day3', 'day7', 'day14', 'day30']] # 测试专用 # df_all.to_excel(or_path('TTT')) # df_all = pd.read_excel(or_path('TTT')) def more(form, col): form[col] = form[col].apply(lambda x: str(int(x))) + '(' + ( form[col] /
# -*- coding: utf-8 -*- # author:Super.Shen import pandas as pd from Func import day import warnings from build.database import date, url4, yesterday, url5, Register, url6, Login warnings.filterwarnings('ignore') from Func import append_excel Register(date(url5)).to_excel( 'C:\\Users\Administrator\Desktop\奇奇乐付费新用户留存1\注册数据\\{}.xlsx'.format( yesterday), index=False) Login(date(url6)).to_excel( 'C:\\Users\Administrator\Desktop\奇奇乐付费新用户留存1\登入数据\\{}.xlsx'.format( yesterday), index=False) date(url4).to_excel( 'C:\\Users\Administrator\Desktop\奇奇乐付费新用户留存1\充值数据\\{}.xlsx'.format( yesterday), index=False) # 读取每日登入用户并合并 df = append_excel('C:\\Users\Administrator\Desktop\奇奇乐付费新用户留存1\登入数据') df_cz = append_excel('C:\\Users\Administrator\Desktop\奇奇乐付费新用户留存1\充值数据') df_zc = append_excel('C:\\Users\Administrator\Desktop\奇奇乐付费新用户留存1\注册数据')
from run4 import run4 from run5 import run5 from run6 import run6 from run7 import run7, run8, run9 df1 = run1() df2 = run2() df3, df_3, df_3_3 = run3() df4, df_4 = run4() df5 = run5() df6 = run6() df7 = run7() # df8 = run8() df9 = run9() df_hb = huishou(date(url3), 1) df_yl = huishou(date(url3), 2) # 数据导出 writer = pd.ExcelWriter( 'C:\\Users\Administrator\Desktop\\run_奇奇乐_{}.xlsx'.format(yesterday)) df9.to_excel(writer, sheet_name='渠道', index=False) df1.to_excel(writer, sheet_name='充值支付类型占比', index=False) df2.to_excel(writer, sheet_name='新注册其次占比', index=False) df3.tail(2).to_excel(writer, sheet_name='金币产出', index=False) df_3.tail(2).to_excel(writer, sheet_name='金币消耗', index=False) df_3_3.tail(2).to_excel(writer, sheet_name='金币系统赠送', index=False)
def run2(): # 读取数据 df = date(url22) df3 = date(url88) # 提取充值数据的日期 df['day'] = df['pay_time'].apply( lambda x: pd.to_datetime(str(x).split(' ')[0])) # # 提取【大前天】的日期 # df2 = df[df['day'] == pd.to_datetime(bef_yesterday)] # df = df[df['day'] == pd.to_datetime(bb_yesterday)] # df和df2重新赋值 df2 = df[df['day'] == pd.to_datetime(yesterday)] # 注册人数df df = df[df['day'] == pd.to_datetime(bef_yesterday)] # 整理前2天当日的注册数据 df3['Flag'] = 'new' df3.rename(columns={'用户id': 'player_id'}, inplace=True) df3 = df3[['player_id', 'Flag']] df = pd.merge(left=df, right=df3, on='player_id', how='left') df['Flag'].fillna('old', inplace=True) df2 = pd.merge(left=df2, right=df3, on='player_id', how='left') df2['Flag'].fillna('old', inplace=True) # df.to_excel('C:\\Users\Administrator\Desktop\\NEW_T.xlsx', index=False) # exit() i = 0 df_form = pd.DataFrame() def df_f(df): # df_form.loc[i, '平台'] = '奇奇乐' df_form.loc[i, '日期'] = '{}'.format(bef_yesterday) # 人数计算 df_form.loc[i, '新用户量'] = len( df[df['Flag'] == 'new']['player_id'].unique()) df_form.loc[i, '总用户量'] = len(df['player_id'].unique()) df_form.loc[i, '新用户占比'] = '%.2f%%' % (df_form.loc[i, '新用户量'] / df_form.loc[i, '总用户量'] * 100) # 金额消费计算 df_form.loc[i, '新用户消费金额'] = df[df['Flag'] == 'new']['amount'].sum() df_form.loc[i, '总消费'] = df['amount'].sum() df_form.loc[i, '新用户消费占比'] = '%.2f%%' % (df_form.loc[i, '新用户消费金额'] / df_form.loc[i, '总消费'] * 100) # 次日再消费人数 df_form.loc[i, '次日再消费用户量'] = len( df2[df2['Flag'] == 'new']['player_id'].unique()) df_form.loc[i, '次日再消费人数比'] = '%.2f%%' % (df_form.loc[i, '次日再消费用户量'] / df_form.loc[i, '新用户量'] * 100) # # 次日再消费金额计算 df_form.loc[i, '次日再消费金额'] = df2[df2['Flag'] == 'new']['amount'].sum() df_form.loc[i, '次日再消费金额比'] = '%.2f%%' % (df_form.loc[i, '次日再消费金额'] / df_form.loc[i, '新用户消费金额'] * 100) return df_form df_form = df_f(df) # 删除多余2列 del df_form['总用户量'] del df_form['总消费'] print('\n第二个表运行完毕……') return df_form