def read_campaign_retention(): path_manager = EnvManager() resource_path = path_manager.get_resource_dir() source_dir = ospath.join(resource_path, 'campaign_retention') source_file = ospath.join(source_dir, 'campaign_retention.csv') source_data = pd.read_csv( source_file, usecols=['Date', 'Campaign', 'Install Day', 'Day 1']) sa_mask = source_data['Campaign'].apply(isContain, sub_string='_SA_') sa_data = source_data[sa_mask] sa_sum = sa_data.groupby('Date').sum() row_num = len(sa_sum.index) if row_num > 0: country_list = pd.Series(['SA'] * row_num, index=sa_sum.index) sa_sum['country'] = country_list ph_mask = source_data['Campaign'].apply(isContain, sub_string='_ph_') ph_data = source_data[ph_mask] ph_sum = ph_data.groupby('Date').sum() row_num = len(ph_sum.index) if row_num > 0: country_list = pd.Series(['PH'] * row_num, index=ph_sum.index) ph_sum['country'] = country_list result_data = pd.concat([sa_sum, ph_sum]) writer = pd.ExcelWriter('sum_retention.xlsx') result_data.to_excel(writer, 'retention') writer.save()
def read_ad_data(account_id, campaign_id): path_manager = EnvManager() insight_dir = path_manager.get_insight_dir(mconstant.NODE_TYPE_AD) account_dir = os.path.join(insight_dir, account_id) config_path = path_manager.get_conf_dir() output_path = os.path.join(path_manager.get_output_dir(), mconstant.NODE_TYPE_AD) fhelper.make_dir(output_path) handler_factory = HandlerFactory(config_path) insight_handler = handler_factory.get_insight_handler( mconstant.NODE_TYPE_AD) in_act_files = fhelper.get_file_list(account_dir) data_per_day = read_ad_insight_day(in_act_files, insight_handler, campaign_id) ad_name_key = '' group_data = group_by_name(data_per_day, ad_name_key) if ad_name_key.strip(): file_name = account_id + '_' + ad_name_key + '_' + str( time.time()) + '.xlsx' act_file = os.path.join(output_path, file_name) ExcelExporter.export_data_excel(group_data, act_file) else: file_name = account_id + '_' + str(time.time()) + '.xlsx' act_file = os.path.join(output_path, file_name) ExcelExporter.export_excel_by_key(iconstant.NEW_FIELD_GROUP_AD_NAME, iconstant.NEW_FIELD_GROUP_AD_NAME, group_data, act_file)
def __init__(self, account_id, handler_factory, start_date='', end_date='', node_types=None, insight_path=''): self._path_manager = EnvManager() self._handler_factory = handler_factory self._current_act_id = account_id self._all_data = {} self._all_handler = {} self._start_date = start_date self._end_date = end_date if not end_date: self._end_date = mhelper.get_now_date() if node_types: self._node_type = node_types else: self._node_type = [ mconstant.NODE_TYPE_CAMPAIGN, mconstant.NODE_TYPE_ADSET, mconstant.NODE_TYPE_AD ] self._insight_path = insight_path self._read_all_data()
def __init__(self): env_manager = EnvManager() self.log_path = env_manager.get_log_path() if not self.log_path: self.log_path = ospath.dirname(__file__) self.logger_name = env_manager.get_log_names() self._config_log()
def analysis_typany_retention(): path_manager = EnvManager() resource_path = path_manager.get_resource_dir() typany_path = ospath.join(resource_path, 'typany') retention_file = ospath.join(typany_path, 'retention_typany.xlsx') retention_data = ReadExcelDataHandler.read_typany_retention(retention_file) # pivot_data = pd.pivot_table(retention_data, index=['Date', 'Country'], values=['Install Day', 'Day 1'], # aggfunc=np.sum, fill_value=0) pivot_data = retention_data.groupby(['Date', 'Country'], as_index=False).sum() output_dir = path_manager.get_output_dir() output_file = ospath.join(output_dir, 'typany.xlsx') writer = pd.ExcelWriter(output_file) pivot_data.to_excel(writer, 'retention') writer.save()
def read_all_account_data(node_type): path_manager = EnvManager() insight_dir = path_manager.get_insight_dir(node_type) account_list = fhelper.get_subdir_name_list(insight_dir) config_path = path_manager.get_conf_dir() output_path = os.path.join(path_manager.get_output_dir(), node_type) fhelper.make_dir(output_path) handler_factory = HandlerFactory(config_path) insight_handler = handler_factory.get_insight_handler(node_type) for account_id in account_list: in_act_dir = os.path.join(insight_dir, account_id) in_act_files = fhelper.get_file_list(in_act_dir) file_name = account_id + '_' + str(time.time()) + '.xlsx' act_file = os.path.join(output_path, file_name) read_insight_per_day(in_act_files, insight_handler, act_file, node_type, account_id)
def merger_all_account_data(node_type): path_manager = EnvManager() insight_dir = path_manager.get_insight_dir(node_type) retention_dir = path_manager.get_retention_dir(node_type) account_list = fhelper.get_subdir_name_list(retention_dir) config_path = path_manager.get_conf_dir() merger_handler = MergerDataHandler(config_path) output_path = os.path.join(path_manager.get_output_dir(), node_type) fhelper.make_dir(output_path) for account_id in account_list: in_act_dir = os.path.join(insight_dir, account_id) re_act_dir = os.path.join(retention_dir, account_id) in_act_files = fhelper.get_file_list(in_act_dir) re_act_files = fhelper.get_file_list(re_act_dir) merger_data = merger_handler.merger_in_re_per_day( in_act_files, re_act_files, node_type) output_act_path = os.path.join(output_path, account_id) fhelper.make_dir(output_act_path) ExcelExporter.export_merger_retention(dhelper.get_key_id(node_type), dhelper.get_key_id(node_type), merger_data, output_act_path)
def export_spend_report(delta_days): path_manager = EnvManager() latest_date = comhelper.get_delta_date(int(delta_days)) profit_resource_dir = ospath.join( "/var/www/html/eli/server/fakeProfitData", latest_date) # profit_resource_dir = path_manager.get_profit_resource_dir() facebook_file = ospath.join(profit_resource_dir, 'facebook_data.xlsx') profit_handler_dir = path_manager.get_profit_handler_dir() config_dir = ospath.join(profit_handler_dir, 'kpi_conf') af_handler = FakeDataHandler(config_dir, 'fake_super_conf.json') # output_dir = path_manager.get_output_dir() output_dir = ospath.join("/var/www/html/eli/server/fakeProfitData", latest_date) facebook_data = ReadExcelDataHandler.read_facebook_data(facebook_file) calculate_data = af_handler.calculate_income(facebook_data) pivot_data = af_handler.group_data_by_country(calculate_data) _save_data_to_excel(output_dir, calculate_data, pivot_data)
def export_spend_report(delta_days): path_manager = EnvManager() # latest_date = comhelper.get_delta_date(int(delta_days)) # profit_resource_dir = ospath.join("/var/www/html/eli/server/profitData", latest_date) profit_resource_dir = path_manager.get_profit_resource_dir() appsflyer_file = ospath.join(profit_resource_dir, 'retention_data.xlsx') facebook_file = ospath.join(profit_resource_dir, 'facebook_data.xlsx') profit_handler_dir = path_manager.get_profit_handler_dir() config_dir = ospath.join(profit_handler_dir, 'kpi_conf') af_handler = AFDataHandler(config_dir, 'super_conf.json') output_dir = path_manager.get_output_dir() # output_dir = ospath.join("/var/www/html/eli/server/profitData", latest_date) appsflyer_data = ReadExcelDataHandler.read_appflyer_data(appsflyer_file) facebook_data = ReadExcelDataHandler.read_facebook_data(facebook_file) merge_data = af_handler.merger_data(facebook_data, appsflyer_data) calculate_data = af_handler.calculate_income(merge_data) pivot_data = af_handler.group_data_by_country(calculate_data) _save_data_to_excel(output_dir, calculate_data, pivot_data)
def export_data_weekly(): path_manager = EnvManager() conf_dir = path_manager.get_conf_dir() weekly_conf = ospath.join(conf_dir, 'insight_weekly_conf.json') conf_info = fhelper.read_json_file(weekly_conf) account_map = conf_info[conconstants.WEEKLY_ACCOUNT_ID] node_types = conf_info[conconstants.WEEKLY_NODE_TYPE] output_path = conf_info[conconstants.WEEKLY_OUTPUT_PATH] insight_path = conf_info[conconstants.WEEKLY_INSIGHT_PATH] if not output_path: output_path = path_manager.get_output_dir() read_start_date = conf_info[conconstants.WEEKLY_READ_START_DATE] current_day = chelper.get_now_date() output_path = ospath.join(output_path, current_day) fhelper.make_dir(output_path) if not read_start_date: read_start_date = chelper.get_delta_date(-9) filter_start_date = chelper.get_delta_date(-8) filter_end_date = chelper.get_delta_date(-2) handler_factory = HandlerFactory(conf_dir) try: for (act_id, act_desc) in account_map.items(): reader = InsightReader(act_id, handler_factory, read_start_date, node_types=node_types, insight_path=insight_path) data_list = [] sheet_name_list = [] file_name = act_desc + '_' + act_id + '_' + filter_start_date + '_' + filter_end_date + '.xlsx' output_file = ospath.join(output_path, file_name) EliLogger.instance().info('Export account: ' + act_desc + '; outputPath: ' + output_file) print 'Export account: ' + act_desc + '; outputPath: ' + output_file for ntype in node_types: daily_data = reader.read_daily_data(ntype, filter_start_date, filter_end_date) hourly_data = reader.read_hourly_data(ntype, filter_start_date, filter_end_date) if daily_data is not None and not daily_data.empty: data_list.append(daily_data) daily_name = inconstants.SHEET_NAME_PREFIX_DAILY + '_' + ntype sheet_name_list.append(daily_name) if hourly_data is not None and not hourly_data.empty: data_list.append(hourly_data) hourly_name = inconstants.SHEET_NAME_PREFIX_HOURLY + '_' + ntype sheet_name_list.append(hourly_name) EliLogger.instance().info('Succeed to export node type: ' + ntype) print 'Succeed to export node type: ' + ntype if len(data_list) > 0: ExcelExporter.export_multi_data_excel(output_file, data_list, sheet_name_list) except Exception, e: EliLogger.instance().error('Failed to export insight analysis. ' + e.message) print 'Failed to export insight analysis. ' + e.message
conversion_data['All conv.'] = conversion_data['All conv.'].apply(str.replace, args=(',', '')).apply(float) conversion_data['Cost'] = conversion_data['Cost'].apply(str.replace, args=(',', '')).apply(float) install_data = pd.read_excel(install_file, parse_cols="B, C") install_data['campaign'] = install_data['campaign'].apply(str) install_data.rename(columns={'campaign': 'Campaign ID', 'install': 'AF Install'}, inplace=True) merger_data = pd.merge(conversion_data, install_data, on='Campaign ID', how='outer') merger_data.fillna(0, inplace=True) merger_data['Other Conversion'] = merger_data['All conv.'] - merger_data['Conversions'] merger_data['Cost/AF'] = map(comhelper.division_operation, merger_data['Cost'], merger_data['AF Install']) merger_data['Cost/Coversion'] = map(comhelper.division_operation, merger_data['Cost'], merger_data['Conversions']) merger_data['Cost/All'] = map(comhelper.division_operation, merger_data['Cost'], merger_data['All conv.']) merger_data['Cost/Other'] = map(comhelper.division_operation, merger_data['Cost'], merger_data['Other Conversion']) export_data = merger_data[['Campaign ID', 'Campaign', 'AF Install', 'Conversions', 'All conv.', 'Other Conversion', 'Cost', 'Cost/AF', 'Cost/Coversion', 'Cost/All', 'Cost/Other']] date_str = comhelper.get_delta_date(-1) file_name = 'adword_conversion_' + date_str + '.xlsx' export_file = ospath.join(output_root_path, file_name) ExcelExporter.export_data_excel(export_data, export_file) if __name__ == '__main__': path_manager = EnvManager() resource_path = path_manager.get_resource_dir() conversion_path = ospath.join(resource_path, 'aw_conversion') output_path = path_manager.get_output_dir() export_conversion(conversion_path, output_path)
grouped_data = grouped_data.reset_index() grouped_data.rename(columns={ 'level_0': iconstant.FIELD_360_DATE, iconstant.INSIGHT_FIELD_ADSET_NAME: iconstant.FIELD_360_ADSET }, inplace=True) grouped_data[iconstant.FIELD_360_DATE] = grouped_data[ iconstant.FIELD_360_DATE].astype('datetime64') return grouped_data if __name__ == '__main__': path_manager = EnvManager() dir_360_retention = path_manager.get_360_retention_dir() file_list = fhelper.get_file_list(dir_360_retention) output_path = path_manager.get_output_dir() account_id = '1227059300703760' for advertiser_file in file_list: advertiser_data = get_360_retention_data(advertiser_file) insight_data = get_insight_data(account_id, path_manager) merger_data = pd.merge( advertiser_data, insight_data, how='left', on=[iconstant.FIELD_360_DATE, iconstant.FIELD_360_ADSET]) merger_data.drop( [iconstant.FIELD_360_CAMPAIGN, iconstant.INSIGHT_FIELD_ADSET_ID], axis=1,
class InsightReader: def __init__(self, account_id, handler_factory, start_date='', end_date='', node_types=None, insight_path=''): self._path_manager = EnvManager() self._handler_factory = handler_factory self._current_act_id = account_id self._all_data = {} self._all_handler = {} self._start_date = start_date self._end_date = end_date if not end_date: self._end_date = mhelper.get_now_date() if node_types: self._node_type = node_types else: self._node_type = [ mconstant.NODE_TYPE_CAMPAIGN, mconstant.NODE_TYPE_ADSET, mconstant.NODE_TYPE_AD ] self._insight_path = insight_path self._read_all_data() def read_daily_data(self, node_type, start_date='', end_date=''): if node_type not in self._all_handler: return None if node_type not in self._all_data: return None handler = self._all_handler[node_type] insight_data = self._all_data[node_type] data_per_day = handler.get_insight_data_per_day(insight_data) if not end_date: end_date = self._end_date index_mask = np.logical_and(data_per_day.index >= start_date, data_per_day.index <= end_date) filter_data = data_per_day[index_mask] return filter_data def read_hourly_data(self, node_type, start_date='', end_date=''): if node_type not in self._all_handler: return None if node_type not in self._all_data: return None handler = self._all_handler[node_type] insight_data = self._all_data[node_type] increment_data = handler.read_increment_by_data(insight_data) hour_data = handler.get_increment_per_hour(increment_data) if not end_date: end_date = self._end_date start_object = mhelper.convert_str_date(start_date, '%Y-%m-%d').date() end_object = mhelper.convert_str_date(end_date, '%Y-%m-%d').date() index_mask = np.logical_and(hour_data.index.date >= start_object, hour_data.index.date <= end_object) filter_data = hour_data[index_mask] return filter_data def _read_all_data(self): for ntype in self._node_type: data_handler = self._handler_factory.get_insight_handler(ntype) file_list = self._get_file_list(ntype) insight_datas = [ data_handler.read_insight_data(file_path, self._current_act_id) for file_path in file_list ] if not insight_datas: continue all_insight_data = pd.concat(insight_datas) self._all_data[ntype] = all_insight_data self._all_handler[ntype] = data_handler def _get_file_list(self, node_type): node_dir = self._path_manager.get_insight_dir(node_type, self._insight_path) account_dir = ospath.join(node_dir, self._current_act_id) node_files = fhelper.get_file_list(account_dir) filtered_files = [ path for path in node_files if self._filter_file(path) ] return filtered_files def _filter_file(self, file_path): date_dirname = ospath.basename(file_path) pattern = '\d+-\d+-\d+' search_date = re.search(pattern, date_dirname) if not search_date: return True date_dir = search_date.group() if self._start_date <= date_dir <= self._end_date: return True else: return False