Exemplo n.º 1
0
def run1():
    df = date(url2)
    df_map = pd.read_excel('C:\\Users\Administrator\Desktop\map.xlsx')
    df_map = df_map[['product_id', 'Flag']]
    df['time'] = df['pay_time'].apply(lambda x: str(x).split(' ')[0])

    # 合并匹配表
    df = pd.merge(left=df, right=df_map, on='product_id', how='left')

    df_out = pd.DataFrame()
    for x, y in df.groupby(['time']):
        y = pd.DataFrame(y.groupby('Flag').size())
        y.columns = [x]
        df_out = pd.concat([df_out, y], axis=1)

    df_out.fillna(0, inplace=True)
    df_out.sort_values(by=df_out.columns[-1], ascending=0, inplace=True)

    df_out.reset_index(inplace=True)
    df_out.rename(columns={'index': '类型', 'Flag': '类型'}, inplace=True)

    print('\n第一个表运行完毕……')

    return df_out
# -*- coding: utf-8 -*-
# author:Super.Shen

import pandas as pd

pd.set_option('expand_frame_repr', False)
pd.set_option('display.max_rows', 1000)
import warnings

warnings.filterwarnings('ignore')

from build.database import url1, date, url7
from build.Func import or_path, gb

# # 导出数据
date(url7).to_excel(or_path('注册充值用户'))
date(url1).to_excel(or_path('变动日志'))

# # 读取充值新用户
df_reg = pd.read_excel(or_path('注册充值用户'))
df_reg = gb(df_reg, '用户id', '充值金额')
df_reg.rename(columns={'用户id': '用户ID'}, inplace=True)

# 读取变动日志

df = pd.read_excel(or_path('变动日志'))

# 数据分析
df['变动时间'] = df['变动时间'].apply(lambda x: pd.to_datetime(x))

df.sort_values('变动时间', inplace=True)
Exemplo n.º 3
0
def run2():
    df = date(url2)

    # 2018-11-27更新【前天注册新用户】
    df3 = date(url8)

    # 提取充值数据的日期
    df['day'] = df['pay_time'].apply(
        lambda x: pd.to_datetime(str(x).split(' ')[0]))

    # df和df2重新赋值
    df2 = df[df['day'] == pd.to_datetime(yesterday)]

    # 注册人数df
    df = df[df['day'] == pd.to_datetime(bef_yesterday)]

    # 整理前2天当日的注册数据
    df3['Flag'] = 'new'
    df3.rename(columns={'用户id': 'player_id'}, inplace=True)

    df3 = df3[['player_id', 'Flag']]

    df = pd.merge(left=df, right=df3, on='player_id', how='left')
    df['Flag'].fillna('old', inplace=True)

    df2 = pd.merge(left=df2, right=df3, on='player_id', how='left')
    df2['Flag'].fillna('old', inplace=True)

    i = 0
    df_form = pd.DataFrame()

    def df_f(df):
        df_form.loc[i, '日期'] = '{}'.format(bef_yesterday)

        # 人数计算
        df_form.loc[i, '新用户量'] = len(
            df[df['Flag'] == 'new']['player_id'].unique())
        df_form.loc[i, '总用户量'] = len(df['player_id'].unique())
        df_form.loc[i, '新用户占比'] = '%.2f%%' % (df_form.loc[i, '新用户量'] /
                                              df_form.loc[i, '总用户量'] * 100)

        # 金额消费计算
        df_form.loc[i, '新用户消费金额'] = df[df['Flag'] == 'new']['amount'].sum()
        df_form.loc[i, '总消费'] = df['amount'].sum()
        df_form.loc[i, '新用户消费占比'] = '%.2f%%' % (df_form.loc[i, '新用户消费金额'] /
                                                df_form.loc[i, '总消费'] * 100)

        # 次日再消费人数
        df_form.loc[i, '次日再消费用户量'] = len(
            df2[df2['Flag'] == 'new']['player_id'].unique())
        df_form.loc[i, '次日再消费人数比'] = '%.2f%%' % (df_form.loc[i, '次日再消费用户量'] /
                                                 df_form.loc[i, '新用户量'] * 100)

        # # 次日再消费金额计算
        df_form.loc[i, '次日再消费金额'] = df2[df2['Flag'] == 'new']['amount'].sum()
        df_form.loc[i,
                    '次日再消费金额比'] = '%.2f%%' % (df_form.loc[i, '次日再消费金额'] /
                                              df_form.loc[i, '新用户消费金额'] * 100)

        return df_form

    df_form = df_f(df)

    # 删除多余2列
    del df_form['总用户量']
    del df_form['总消费']

    print('\n第二个表运行完毕……')
    return df_form
# -*- coding: utf-8 -*-
# author:Super.Shen

import pandas as pd

pd.set_option('expand_frame_repr', False)
pd.set_option('display.max_rows', 1000)
import warnings

warnings.filterwarnings('ignore')

from build.database import url11, date, url77
from build.Func import or_path, gb

# # # 导出数据
date(url77).to_excel(or_path('注册充值用户2'))
date(url11).to_excel(or_path('变动日志2'))
# exit()

# # 读取充值新用户
df_reg = pd.read_excel(or_path('注册充值用户2'))
df_reg = gb(df_reg, '用户id', '充值金额')
df_reg.rename(columns={'用户id': '用户ID'}, inplace=True)

# 读取变动日志

df = pd.read_excel(or_path('变动日志2'))

# 数据分析
df['变动时间'] = df['变动时间'].apply(lambda x: pd.to_datetime(x))
# -*- coding: utf-8 -*-
# author:Super.Shen

import pandas as pd
from Func import day
import warnings
from build.database import date, url4, yesterday

warnings.filterwarnings('ignore')

from Func import append_excel

date(url4).to_excel(
    'C:\\Users\Administrator\Desktop\奇奇乐付费新用户留存统计\充值数据\\{}.xls'.format(
        yesterday),
    index=False)

# 读取每日登入用户并合并
df = append_excel('C:\\Users\Administrator\Desktop\奇奇乐付费新用户留存统计\登入数据')

df_cz = append_excel('C:\\Users\Administrator\Desktop\奇奇乐付费新用户留存统计\充值数据')
df_zc = append_excel('C:\\Users\Administrator\Desktop\奇奇乐付费新用户留存统计\注册数据')

# 【充值表】生成:去重列
df_cz['time'] = df_cz['pay_time'].apply(
    lambda x: pd.to_datetime(str(x).split(' ')[0]))
df_cz['on'] = df_cz['time'].apply(
    lambda x: str(x)) + '|' + df_cz['player_id'].apply(lambda x: str(x))

# 充值表自我去重
df_cz = df_cz.drop_duplicates(subset=['on'], keep='first')
Exemplo n.º 6
0
def fx(day):
    df, df1, df2 = lz_3lc(day)
    date(df).to_excel(path + '充值数据\\{}.xlsx'.format(day), index=False)
    Register(date(df1)).to_excel(path + '注册数据\\{}.xlsx'.format(day),
                                 index=False)
    Login(date(df2)).to_excel(path + '登入数据\\{}.xlsx'.format(day), index=False)
Exemplo n.º 7
0
pd.set_option('expand_frame_repr', False)
pd.set_option('display.max_rows', 1000)
import warnings

warnings.filterwarnings('ignore')
import datetime

today = datetime.date.today()
day_cut = int(
    str(pd.to_datetime(today) - pd.to_datetime('2018/11/14')).split(' ')[0])

df_all = pd.DataFrame()
for x in range(day_cut, 0, -1):
    day = today - datetime.timedelta(days=x)
    df = date(lc_url(day))
    df['time'] = day
    df_all = df_all.append(df)
    print('{}号抓取完毕!'.format(day))

df_all.reset_index(inplace=True)
df_all = df_all[['time', 'counts', 'day1', 'day3', 'day7', 'day14', 'day30']]

# 测试专用
# df_all.to_excel(or_path('TTT'))
# df_all = pd.read_excel(or_path('TTT'))


def more(form, col):
    form[col] = form[col].apply(lambda x: str(int(x))) + '(' + (
        form[col] /
# -*- coding: utf-8 -*-
# author:Super.Shen

import pandas as pd
from Func import day
import warnings
from build.database import date, url4, yesterday, url5, Register, url6, Login

warnings.filterwarnings('ignore')

from Func import append_excel

Register(date(url5)).to_excel(
    'C:\\Users\Administrator\Desktop\奇奇乐付费新用户留存1\注册数据\\{}.xlsx'.format(
        yesterday),
    index=False)

Login(date(url6)).to_excel(
    'C:\\Users\Administrator\Desktop\奇奇乐付费新用户留存1\登入数据\\{}.xlsx'.format(
        yesterday),
    index=False)

date(url4).to_excel(
    'C:\\Users\Administrator\Desktop\奇奇乐付费新用户留存1\充值数据\\{}.xlsx'.format(
        yesterday),
    index=False)

# 读取每日登入用户并合并
df = append_excel('C:\\Users\Administrator\Desktop\奇奇乐付费新用户留存1\登入数据')
df_cz = append_excel('C:\\Users\Administrator\Desktop\奇奇乐付费新用户留存1\充值数据')
df_zc = append_excel('C:\\Users\Administrator\Desktop\奇奇乐付费新用户留存1\注册数据')
Exemplo n.º 9
0
from run4 import run4
from run5 import run5
from run6 import run6
from run7 import run7, run8, run9

df1 = run1()
df2 = run2()
df3, df_3, df_3_3 = run3()
df4, df_4 = run4()
df5 = run5()
df6 = run6()
df7 = run7()
# df8 = run8()
df9 = run9()

df_hb = huishou(date(url3), 1)
df_yl = huishou(date(url3), 2)

# 数据导出
writer = pd.ExcelWriter(
    'C:\\Users\Administrator\Desktop\\run_奇奇乐_{}.xlsx'.format(yesterday))

df9.to_excel(writer, sheet_name='渠道', index=False)

df1.to_excel(writer, sheet_name='充值支付类型占比', index=False)
df2.to_excel(writer, sheet_name='新注册其次占比', index=False)

df3.tail(2).to_excel(writer, sheet_name='金币产出', index=False)
df_3.tail(2).to_excel(writer, sheet_name='金币消耗', index=False)
df_3_3.tail(2).to_excel(writer, sheet_name='金币系统赠送', index=False)
Exemplo n.º 10
0
def run2():

    # 读取数据
    df = date(url22)
    df3 = date(url88)

    # 提取充值数据的日期
    df['day'] = df['pay_time'].apply(
        lambda x: pd.to_datetime(str(x).split(' ')[0]))

    # # 提取【大前天】的日期
    # df2 = df[df['day'] == pd.to_datetime(bef_yesterday)]
    # df = df[df['day'] == pd.to_datetime(bb_yesterday)]

    # df和df2重新赋值
    df2 = df[df['day'] == pd.to_datetime(yesterday)]

    # 注册人数df
    df = df[df['day'] == pd.to_datetime(bef_yesterday)]

    # 整理前2天当日的注册数据
    df3['Flag'] = 'new'
    df3.rename(columns={'用户id': 'player_id'}, inplace=True)
    df3 = df3[['player_id', 'Flag']]

    df = pd.merge(left=df, right=df3, on='player_id', how='left')
    df['Flag'].fillna('old', inplace=True)

    df2 = pd.merge(left=df2, right=df3, on='player_id', how='left')
    df2['Flag'].fillna('old', inplace=True)

    # df.to_excel('C:\\Users\Administrator\Desktop\\NEW_T.xlsx', index=False)
    # exit()

    i = 0
    df_form = pd.DataFrame()

    def df_f(df):
        # df_form.loc[i, '平台'] = '奇奇乐'
        df_form.loc[i, '日期'] = '{}'.format(bef_yesterday)

        # 人数计算
        df_form.loc[i, '新用户量'] = len(
            df[df['Flag'] == 'new']['player_id'].unique())
        df_form.loc[i, '总用户量'] = len(df['player_id'].unique())
        df_form.loc[i, '新用户占比'] = '%.2f%%' % (df_form.loc[i, '新用户量'] /
                                              df_form.loc[i, '总用户量'] * 100)

        # 金额消费计算
        df_form.loc[i, '新用户消费金额'] = df[df['Flag'] == 'new']['amount'].sum()
        df_form.loc[i, '总消费'] = df['amount'].sum()
        df_form.loc[i, '新用户消费占比'] = '%.2f%%' % (df_form.loc[i, '新用户消费金额'] /
                                                df_form.loc[i, '总消费'] * 100)

        # 次日再消费人数
        df_form.loc[i, '次日再消费用户量'] = len(
            df2[df2['Flag'] == 'new']['player_id'].unique())
        df_form.loc[i, '次日再消费人数比'] = '%.2f%%' % (df_form.loc[i, '次日再消费用户量'] /
                                                 df_form.loc[i, '新用户量'] * 100)

        # # 次日再消费金额计算
        df_form.loc[i, '次日再消费金额'] = df2[df2['Flag'] == 'new']['amount'].sum()
        df_form.loc[i,
                    '次日再消费金额比'] = '%.2f%%' % (df_form.loc[i, '次日再消费金额'] /
                                              df_form.loc[i, '新用户消费金额'] * 100)

        return df_form

    df_form = df_f(df)

    # 删除多余2列
    del df_form['总用户量']
    del df_form['总消费']

    print('\n第二个表运行完毕……')
    return df_form