def get_insight_data(act_id, pmanager): node_type = mconstant.NODE_TYPE_ADSET insight_dir = pmanager.get_insight_dir(node_type) retention_dir = pmanager.get_retention_dir(node_type) in_act_dir = os.path.join(insight_dir, act_id) re_act_dir = os.path.join(retention_dir, act_id) in_act_files = fhelper.get_file_list(in_act_dir) re_act_files = fhelper.get_file_list(re_act_dir) config_path = pmanager.get_conf_dir() merger_handler = MergerDataHandler(config_path) merger_in_data = merger_handler.merger_in_re_per_day( in_act_files, re_act_files, node_type) merger_in_data.set_index([iconstant.INSIGHT_FIELD_ADSET_ID], append=True, inplace=True) grouped_data = merger_in_data.groupby(level=[0, 2]).agg({ iconstant.INSIGHT_FIELD_RESULT: np.max, iconstant.INSIGHT_FIELD_UNIQUE_MOBILE_START: _diff_app_start, iconstant.INSIGHT_FIELD_ADSET_NAME: _last_value }) grouped_data = grouped_data.reset_index() grouped_data.rename(columns={ 'level_0': iconstant.FIELD_360_DATE, iconstant.INSIGHT_FIELD_ADSET_NAME: iconstant.FIELD_360_ADSET }, inplace=True) grouped_data[iconstant.FIELD_360_DATE] = grouped_data[ iconstant.FIELD_360_DATE].astype('datetime64') return grouped_data
def merger_all_account_data(node_type): path_manager = EnvManager() insight_dir = path_manager.get_insight_dir(node_type) retention_dir = path_manager.get_retention_dir(node_type) account_list = fhelper.get_subdir_name_list(retention_dir) config_path = path_manager.get_conf_dir() merger_handler = MergerDataHandler(config_path) output_path = os.path.join(path_manager.get_output_dir(), node_type) fhelper.make_dir(output_path) for account_id in account_list: in_act_dir = os.path.join(insight_dir, account_id) re_act_dir = os.path.join(retention_dir, account_id) in_act_files = fhelper.get_file_list(in_act_dir) re_act_files = fhelper.get_file_list(re_act_dir) merger_data = merger_handler.merger_in_re_per_day( in_act_files, re_act_files, node_type) output_act_path = os.path.join(output_path, account_id) fhelper.make_dir(output_act_path) ExcelExporter.export_merger_retention(dhelper.get_key_id(node_type), dhelper.get_key_id(node_type), merger_data, output_act_path)
def read_ad_data(account_id, campaign_id): path_manager = EnvManager() insight_dir = path_manager.get_insight_dir(mconstant.NODE_TYPE_AD) account_dir = os.path.join(insight_dir, account_id) config_path = path_manager.get_conf_dir() output_path = os.path.join(path_manager.get_output_dir(), mconstant.NODE_TYPE_AD) fhelper.make_dir(output_path) handler_factory = HandlerFactory(config_path) insight_handler = handler_factory.get_insight_handler( mconstant.NODE_TYPE_AD) in_act_files = fhelper.get_file_list(account_dir) data_per_day = read_ad_insight_day(in_act_files, insight_handler, campaign_id) ad_name_key = '' group_data = group_by_name(data_per_day, ad_name_key) if ad_name_key.strip(): file_name = account_id + '_' + ad_name_key + '_' + str( time.time()) + '.xlsx' act_file = os.path.join(output_path, file_name) ExcelExporter.export_data_excel(group_data, act_file) else: file_name = account_id + '_' + str(time.time()) + '.xlsx' act_file = os.path.join(output_path, file_name) ExcelExporter.export_excel_by_key(iconstant.NEW_FIELD_GROUP_AD_NAME, iconstant.NEW_FIELD_GROUP_AD_NAME, group_data, act_file)
def _get_file_list(self, node_type): node_dir = self._path_manager.get_insight_dir(node_type, self._insight_path) account_dir = ospath.join(node_dir, self._current_act_id) node_files = fhelper.get_file_list(account_dir) filtered_files = [ path for path in node_files if self._filter_file(path) ] return filtered_files
def read_all_account_data(node_type): path_manager = EnvManager() insight_dir = path_manager.get_insight_dir(node_type) account_list = fhelper.get_subdir_name_list(insight_dir) config_path = path_manager.get_conf_dir() output_path = os.path.join(path_manager.get_output_dir(), node_type) fhelper.make_dir(output_path) handler_factory = HandlerFactory(config_path) insight_handler = handler_factory.get_insight_handler(node_type) for account_id in account_list: in_act_dir = os.path.join(insight_dir, account_id) in_act_files = fhelper.get_file_list(in_act_dir) file_name = account_id + '_' + str(time.time()) + '.xlsx' act_file = os.path.join(output_path, file_name) read_insight_per_day(in_act_files, insight_handler, act_file, node_type, account_id)
'level_0': iconstant.FIELD_360_DATE, iconstant.INSIGHT_FIELD_ADSET_NAME: iconstant.FIELD_360_ADSET }, inplace=True) grouped_data[iconstant.FIELD_360_DATE] = grouped_data[ iconstant.FIELD_360_DATE].astype('datetime64') return grouped_data if __name__ == '__main__': path_manager = EnvManager() dir_360_retention = path_manager.get_360_retention_dir() file_list = fhelper.get_file_list(dir_360_retention) output_path = path_manager.get_output_dir() account_id = '1227059300703760' for advertiser_file in file_list: advertiser_data = get_360_retention_data(advertiser_file) insight_data = get_insight_data(account_id, path_manager) merger_data = pd.merge( advertiser_data, insight_data, how='left', on=[iconstant.FIELD_360_DATE, iconstant.FIELD_360_ADSET]) merger_data.drop( [iconstant.FIELD_360_CAMPAIGN, iconstant.INSIGHT_FIELD_ADSET_ID], axis=1, inplace=True) ExcelExporter.export_data_excel(