def ReadVersionData(dimension): global rule_data file = settings.GetRecentFile( os.path.join(settings.sourcePath, 'Version_Base'), 'VGC_Version_Weekly') ways_data = pd.read_excel(file).loc[:, :'ROLL_MIX'] if 'FUELTYPE' in dimension: ways_data['ROLL_MIX'] = \ ways_data.query('TP > 0').groupby([*dimension, 'PRICE_DATE'])['ROLL_MIX'].apply(lambda x: x / x.sum()) # ways_data['MIX'] = \ # ways_data.groupby(['SUB_MODEL_ID', 'PRICE_DATE'])['MIX'].apply(lambda x: x / x.sum()) # ways_data['ROLL_MIX'] = \ # ways_data.query('TP > 0').groupby([*dimension, 'PRICE_DATE'])['ROLL_SALES'].apply(lambda x: x / x.sum()) # ways_data['DISCOUNTR'] = \ # ways_data.query('TP > 0').apply(lambda x: x['TP'] / x['MSRP'] - 1 if x['MSRP'] > 0 else None, axis=1) # ways_data['SUB_MODEL_MIX_SALES'] = \ # ways_data.apply(lambda x: x['SUB_MODEL_SALES'] * x['MIX'] if pd.notnull(x['SUB_MODEL_SALES']) and pd.notnull(x['MIX']) else None, axis=1) # ways_data['DIMENSION']= \ # ways_data[ShowDimension(dimension)].apply(lambda x: ' '.join(x), axis=1) return ways_data.sort_values(by=[ 'BRAND', 'MODELGROUP', '报告车型名称', 'SUB_MODEL_ID', 'VERSION_ID', 'PRICE_DATE' ])
def ReadVersionData(dimension): from sql import rule_data file = settings.GetRecentFile(os.path.join(settings.sourcePath, 'Version_Base'), 'VGC_Version_Weekly') ways_data = pd.read_excel(file).loc[:, :'ROLL_MIX'].reset_index(drop=True) ways_data[['HALT_PRODUCT_DATE', 'HALT_SALE_DATE']] = ways_data.merge(rule_data[['VERSION_ID', 'HALT_PRODUCT_DATE', 'HALT_SALE_DATE']], on='VERSION_ID', how='left', suffixes=(' ', ''))[['HALT_PRODUCT_DATE', 'HALT_SALE_DATE']].fillna(ways_data) if 'FUELTYPE' in dimension: ways_data['ROLL_MIX'] = \ ways_data.query('TP > 0').groupby([*dimension, 'PRICE_DATE'])['ROLL_MIX'].apply(lambda x: x / x.sum()) return ways_data.sort_values(by=['PRICE_DATE', 'BRAND', '报告车型名称', 'VERSION_ID'])
def CalculateDimensionData(ways_data, dimension): vgc_data = \ ways_data.groupby([*dimension, 'YM_ID', 'PRICE_DATE']) \ .apply(lambda x: pd.Series({ 'MSRP': (x['MSRP'] * x['ROLL_MIX']).sum() if any(pd.notnull(x['MSRP'])) and x['ROLL_MIX'].sum() else None, 'TP': (x['TP'] * x['ROLL_MIX']).sum() if any(pd.notnull(x['TP'])) and x['ROLL_MIX'].sum() else None, 'DISCOUNTR': (x['DISCOUNTR'] * x['ROLL_MIX']).sum() if any(pd.notnull(x['DISCOUNTR'])) and x['ROLL_MIX'].sum() else None, 'DIMENSION_SALES': x['SUB_MODEL_MIX_SALES'].sum() if x['SUB_MODEL_MIX_SALES'].sum() else None})) \ .reset_index() vgc_data['MSRP_INDEX'] = \ vgc_data.sort_values('PRICE_DATE') \ .groupby(dimension)['MSRP'] \ .apply(lambda x: x.div(x.shift(1))) \ .replace([inf,]) vgc_data.insert( column='DIMENSION', loc=list(vgc_data).index('YM_ID'), value=vgc_data[ShowDimension(dimension)].apply(lambda x: ' '.join(x), axis=1)) roll_data = \ vgc_data[['DIMENSION', 'YM_ID', 'DIMENSION_SALES']].drop_duplicates(subset=['DIMENSION', 'YM_ID']) roll_data['ROLL_DIMENSION_SALES'] = \ roll_data.sort_values('YM_ID').groupby(['DIMENSION'])['DIMENSION_SALES'].apply(lambda x: x.rolling(window=6, min_periods=1).sum().shift(1)) vgc_data = \ vgc_data.merge(roll_data.drop('DIMENSION_SALES', axis=1), on=list(roll_data)[:-2], how='left') modify_file = settings.GetRecentFile(os.path.join(settings.sourcePath, 'Modify'), '周报') sales_modify = \ pd.read_excel(modify_file, r'ROLL_DIMENSION_SALES').filter(['DIMENSION', 'PRICE_DATE', 'ROLL_DIMENSION_SALES'], axis=1).drop_duplicates(['DIMENSION', 'PRICE_DATE'], keep='last') vgc_data = vgc_data.reset_index(drop=True) vgc_data = vgc_data.iloc[:, :-1].merge(sales_modify, on=list(sales_modify)[:-1], how='left').fillna(vgc_data) vgc_data = ShiftDimensionSales(vgc_data, dimension) return vgc_data.sort_values(by=['PRICE_DATE', 'BRAND', '报告车型名称'])
if __name__ == "__main__": ROOT = tk.Tk() ROOT.withdraw() file_dict = { int(re.match('\d+', x).group()): x for x in os.listdir(settings.sourcePath) } USER_INPUT = simpledialog.askinteger( title="选择报告", prompt='\n'.join( [f'{key} - {value}' for key, value in file_dict.items()])) if not USER_INPUT: exit(0) sourceFileName = settings.GetRecentFile( os.path.join(settings.sourcePath, file_dict.get(USER_INPUT))) app = xw.App(add_book=False, visible=settings.isVisible) app.display_alerts = False if USER_INPUT == 2: f2(sourceFileName, '原始数据') elif USER_INPUT == 3: f3(sourceFileName, '数据源') elif USER_INPUT == 5: f5(sourceFileName, '非百强集团旗下4S店清单(数据源)') elif USER_INPUT == 6: f6(sourceFileName, '数据源') app.visible = 1 exit(0)
def OpenExcel(): global app, wb app = xw.App(add_book=False, visible=settings.isVisible) app.display_alerts = False wb = app.books.open(settings.GetRecentFile(settings.sourcePath, '.xlsm'))
import settings import province_process import pandas as pd import xlwings as xw import win32com.client as com from win32com.client import constants as c import re import os import warnings warnings.filterwarnings('ignore') app = xw.App(add_book=False, visible=settings.isVisible) app.display_alerts = False # app.screen_updating = False template_wb = app.books.open(settings.GetRecentFile(settings.origPath, '省级地图')) province = template_wb.sheets['省份'].range(1, 1).options(pd.DataFrame, index=False, expand='table').value source_wb = app.books.open(settings.GetRecentFile(settings.sourcePath, '.xlsx')) data = source_wb.sheets[0].range(1, 1).options(pd.DataFrame, index=False, expand='table').value.merge( province, left_on='目标城市', right_on='PROVINCE_NAME', how='left', indicator=True) if 'left_only' in data._merge.values:
] data = GetSummaryData(segment, sheet_name, index_order) UpdateShapeChart(data, chart) logger.info( f"Done - {slide.SlideIndex:02} {slide.Shapes('Title_').TextFrame.TextRange.Text}" ) del slide if __name__ == "__main__": logger = settings.logger logger.info(f"\n开始更新...") reportFileName = settings.GetRecentFile(settings.reportPath, '价格分析报告补充') App = com.gencache.EnsureDispatch('Powerpoint.Application') ppt = App.Presentations.Open(reportFileName, WithWindow=settings.isVisible) App.WindowState = 2 # 窗口最小化 process.OpenExcel() try: main(ppt) except Exception as e: logger.info(e) logger.info(f"中断更新.") finally: resultFileName = re.sub( '(\d+年\d+月)(.*?)(\d+.)', '{}\g<2>{}.'.format(settings.report_date.strftime('%Y年%#m月'),
def GetPriceIndexData(dimension): if 'TOTAL' in dimension: tmp_data = source_price_index_data else: tmp_data = source_price_index_data[source_price_index_data.CUST_SEGMENT_NAME_CHN.isin(segment_list)] tmp_data = tmp_data.assign(TOTAL='整体')\ .groupby(dimension)\ .apply(lambda x: pd.Series({'MSRP': (x.MSRP * x.SALES_QTY / x.SALES_QTY.sum()).sum(), 'TP': (x.AVG_TP * x.SALES_QTY / x.SALES_QTY.sum()).sum()})) tmp_data['DISCOUNT_RATE'] = tmp_data.apply(lambda x: (x.MSRP - x.TP) / x.MSRP, axis=1) return tmp_data.drop('MSRP', axis=1) db = common.OracleDB('DBBG') query = settings.ReadFile(settings.GetRecentFile(settings.sqlPath))\ .replace('SPECIFICPRICE_DATE', str(settings.price_date)) source_price_index_data = db.query_data(query).query("PRICE_DATE == {}".format(settings.price_date)) price_index_data = GetPriceIndexData(['TOTAL']).append(GetPriceIndexData(['CUST_SEGMENT_NAME_CHN'])) print(price_index_data.shape) price_level_data = db.query_data_from_procedure('pkg_report_changan.proc_price_level', [settings.price_date]).drop('PRICE_DATE', axis=1).set_index(['CUST_SEGMENT_NAME_CHN', 'PEICE_SEGMENT']).unstack('PEICE_SEGMENT').droplevel(level=0, axis=1) print(price_level_data.shape) manf_price_data = db.query_data_from_procedure('pkg_report_changan.proc_manf_wt_tp', [settings.price_date]).drop('PRICE_DATE', axis=1).set_index('MANF_NAME').rename(columns={'WEIGHT_TP':'TP', 'WEIGHT_DISCOUNT':'DISCOUNT_RATE'}) print(manf_price_data.shape) db.close()
import settings from process import TransformData, AdjustSheetFormat from win32com.client import constants as c import pandas as pd import xlwings as xw import os import warnings warnings.filterwarnings('ignore') sourceFileName = settings.GetRecentFile(settings.sourcePath) app = xw.App(add_book=False, visible=settings.isVisible) app.display_alerts = False wb = app.books.open(sourceFileName) data = wb.sheets[0].range(1, 1).options( pd.DataFrame, index=False, expand='table').value.query(f'YM_ID >= {settings.last_report_date//100}' ).query('ROLL_AVG_SALES > 0').reset_index( drop=True) # 筛选两个月都有TP的数据 并重新归一mix filter_index = data[['VERSION_ID', 'YM_ID', 'TP']].set_index( ['VERSION_ID', 'YM_ID']).unstack('YM_ID').apply(lambda x: all(pd.notnull(x)), axis=1) data = data[data.VERSION_ID.isin( filter_index[filter_index].index.values)].reset_index(drop=True) data.ROLL_MIX = data.groupby(['DIMENSION', 'YM_ID' ])['ROLL_AVG_SALES'].apply(lambda x: x / x.sum())