def read_campaign_retention():
    path_manager = EnvManager()
    resource_path = path_manager.get_resource_dir()
    source_dir = ospath.join(resource_path, 'campaign_retention')
    source_file = ospath.join(source_dir, 'campaign_retention.csv')
    source_data = pd.read_csv(
        source_file, usecols=['Date', 'Campaign', 'Install Day', 'Day 1'])

    sa_mask = source_data['Campaign'].apply(isContain, sub_string='_SA_')
    sa_data = source_data[sa_mask]
    sa_sum = sa_data.groupby('Date').sum()
    row_num = len(sa_sum.index)
    if row_num > 0:
        country_list = pd.Series(['SA'] * row_num, index=sa_sum.index)
        sa_sum['country'] = country_list

    ph_mask = source_data['Campaign'].apply(isContain, sub_string='_ph_')
    ph_data = source_data[ph_mask]
    ph_sum = ph_data.groupby('Date').sum()
    row_num = len(ph_sum.index)
    if row_num > 0:
        country_list = pd.Series(['PH'] * row_num, index=ph_sum.index)
        ph_sum['country'] = country_list

    result_data = pd.concat([sa_sum, ph_sum])

    writer = pd.ExcelWriter('sum_retention.xlsx')
    result_data.to_excel(writer, 'retention')
    writer.save()
def read_ad_data(account_id, campaign_id):
    path_manager = EnvManager()
    insight_dir = path_manager.get_insight_dir(mconstant.NODE_TYPE_AD)
    account_dir = os.path.join(insight_dir, account_id)

    config_path = path_manager.get_conf_dir()
    output_path = os.path.join(path_manager.get_output_dir(),
                               mconstant.NODE_TYPE_AD)
    fhelper.make_dir(output_path)

    handler_factory = HandlerFactory(config_path)
    insight_handler = handler_factory.get_insight_handler(
        mconstant.NODE_TYPE_AD)

    in_act_files = fhelper.get_file_list(account_dir)

    data_per_day = read_ad_insight_day(in_act_files, insight_handler,
                                       campaign_id)

    ad_name_key = ''
    group_data = group_by_name(data_per_day, ad_name_key)

    if ad_name_key.strip():
        file_name = account_id + '_' + ad_name_key + '_' + str(
            time.time()) + '.xlsx'
        act_file = os.path.join(output_path, file_name)
        ExcelExporter.export_data_excel(group_data, act_file)
    else:
        file_name = account_id + '_' + str(time.time()) + '.xlsx'
        act_file = os.path.join(output_path, file_name)
        ExcelExporter.export_excel_by_key(iconstant.NEW_FIELD_GROUP_AD_NAME,
                                          iconstant.NEW_FIELD_GROUP_AD_NAME,
                                          group_data, act_file)
Пример #3
0
    def __init__(self,
                 account_id,
                 handler_factory,
                 start_date='',
                 end_date='',
                 node_types=None,
                 insight_path=''):
        self._path_manager = EnvManager()
        self._handler_factory = handler_factory
        self._current_act_id = account_id
        self._all_data = {}
        self._all_handler = {}
        self._start_date = start_date
        self._end_date = end_date
        if not end_date:
            self._end_date = mhelper.get_now_date()

        if node_types:
            self._node_type = node_types
        else:
            self._node_type = [
                mconstant.NODE_TYPE_CAMPAIGN, mconstant.NODE_TYPE_ADSET,
                mconstant.NODE_TYPE_AD
            ]

        self._insight_path = insight_path

        self._read_all_data()
Пример #4
0
 def __init__(self):
     env_manager = EnvManager()
     self.log_path = env_manager.get_log_path()
     if not self.log_path:
         self.log_path = ospath.dirname(__file__)
     self.logger_name = env_manager.get_log_names()
     self._config_log()
def analysis_typany_retention():
    path_manager = EnvManager()
    resource_path = path_manager.get_resource_dir()
    typany_path = ospath.join(resource_path, 'typany')
    retention_file = ospath.join(typany_path, 'retention_typany.xlsx')
    retention_data = ReadExcelDataHandler.read_typany_retention(retention_file)
    # pivot_data = pd.pivot_table(retention_data, index=['Date', 'Country'], values=['Install Day', 'Day 1'],
    #                             aggfunc=np.sum, fill_value=0)
    pivot_data = retention_data.groupby(['Date', 'Country'],
                                        as_index=False).sum()

    output_dir = path_manager.get_output_dir()
    output_file = ospath.join(output_dir, 'typany.xlsx')
    writer = pd.ExcelWriter(output_file)
    pivot_data.to_excel(writer, 'retention')
    writer.save()
Пример #6
0
def read_all_account_data(node_type):
    path_manager = EnvManager()
    insight_dir = path_manager.get_insight_dir(node_type)
    account_list = fhelper.get_subdir_name_list(insight_dir)

    config_path = path_manager.get_conf_dir()
    output_path = os.path.join(path_manager.get_output_dir(), node_type)
    fhelper.make_dir(output_path)

    handler_factory = HandlerFactory(config_path)
    insight_handler = handler_factory.get_insight_handler(node_type)

    for account_id in account_list:
        in_act_dir = os.path.join(insight_dir, account_id)
        in_act_files = fhelper.get_file_list(in_act_dir)

        file_name = account_id + '_' + str(time.time()) + '.xlsx'
        act_file = os.path.join(output_path, file_name)

        read_insight_per_day(in_act_files, insight_handler, act_file, node_type, account_id)
Пример #7
0
def merger_all_account_data(node_type):
    path_manager = EnvManager()
    insight_dir = path_manager.get_insight_dir(node_type)
    retention_dir = path_manager.get_retention_dir(node_type)
    account_list = fhelper.get_subdir_name_list(retention_dir)

    config_path = path_manager.get_conf_dir()
    merger_handler = MergerDataHandler(config_path)

    output_path = os.path.join(path_manager.get_output_dir(), node_type)
    fhelper.make_dir(output_path)

    for account_id in account_list:
        in_act_dir = os.path.join(insight_dir, account_id)
        re_act_dir = os.path.join(retention_dir, account_id)
        in_act_files = fhelper.get_file_list(in_act_dir)
        re_act_files = fhelper.get_file_list(re_act_dir)
        merger_data = merger_handler.merger_in_re_per_day(
            in_act_files, re_act_files, node_type)

        output_act_path = os.path.join(output_path, account_id)
        fhelper.make_dir(output_act_path)
        ExcelExporter.export_merger_retention(dhelper.get_key_id(node_type),
                                              dhelper.get_key_id(node_type),
                                              merger_data, output_act_path)
Пример #8
0
def export_spend_report(delta_days):
    path_manager = EnvManager()
    latest_date = comhelper.get_delta_date(int(delta_days))
    profit_resource_dir = ospath.join(
        "/var/www/html/eli/server/fakeProfitData", latest_date)
    # profit_resource_dir = path_manager.get_profit_resource_dir()
    facebook_file = ospath.join(profit_resource_dir, 'facebook_data.xlsx')

    profit_handler_dir = path_manager.get_profit_handler_dir()
    config_dir = ospath.join(profit_handler_dir, 'kpi_conf')
    af_handler = FakeDataHandler(config_dir, 'fake_super_conf.json')

    # output_dir = path_manager.get_output_dir()
    output_dir = ospath.join("/var/www/html/eli/server/fakeProfitData",
                             latest_date)

    facebook_data = ReadExcelDataHandler.read_facebook_data(facebook_file)

    calculate_data = af_handler.calculate_income(facebook_data)
    pivot_data = af_handler.group_data_by_country(calculate_data)

    _save_data_to_excel(output_dir, calculate_data, pivot_data)
Пример #9
0
def export_spend_report(delta_days):
    path_manager = EnvManager()
    # latest_date = comhelper.get_delta_date(int(delta_days))
    # profit_resource_dir = ospath.join("/var/www/html/eli/server/profitData", latest_date)
    profit_resource_dir = path_manager.get_profit_resource_dir()
    appsflyer_file = ospath.join(profit_resource_dir, 'retention_data.xlsx')
    facebook_file = ospath.join(profit_resource_dir, 'facebook_data.xlsx')

    profit_handler_dir = path_manager.get_profit_handler_dir()
    config_dir = ospath.join(profit_handler_dir, 'kpi_conf')
    af_handler = AFDataHandler(config_dir, 'super_conf.json')

    output_dir = path_manager.get_output_dir()
    # output_dir = ospath.join("/var/www/html/eli/server/profitData", latest_date)

    appsflyer_data = ReadExcelDataHandler.read_appflyer_data(appsflyer_file)
    facebook_data = ReadExcelDataHandler.read_facebook_data(facebook_file)
    merge_data = af_handler.merger_data(facebook_data, appsflyer_data)

    calculate_data = af_handler.calculate_income(merge_data)
    pivot_data = af_handler.group_data_by_country(calculate_data)

    _save_data_to_excel(output_dir, calculate_data, pivot_data)
def export_data_weekly():
    path_manager = EnvManager()
    conf_dir = path_manager.get_conf_dir()
    weekly_conf = ospath.join(conf_dir, 'insight_weekly_conf.json')
    conf_info = fhelper.read_json_file(weekly_conf)
    account_map = conf_info[conconstants.WEEKLY_ACCOUNT_ID]
    node_types = conf_info[conconstants.WEEKLY_NODE_TYPE]
    output_path = conf_info[conconstants.WEEKLY_OUTPUT_PATH]
    insight_path = conf_info[conconstants.WEEKLY_INSIGHT_PATH]
    if not output_path:
        output_path = path_manager.get_output_dir()
    read_start_date = conf_info[conconstants.WEEKLY_READ_START_DATE]

    current_day = chelper.get_now_date()
    output_path = ospath.join(output_path, current_day)
    fhelper.make_dir(output_path)

    if not read_start_date:
        read_start_date = chelper.get_delta_date(-9)
    filter_start_date = chelper.get_delta_date(-8)
    filter_end_date = chelper.get_delta_date(-2)

    handler_factory = HandlerFactory(conf_dir)

    try:
        for (act_id, act_desc) in account_map.items():
            reader = InsightReader(act_id,
                                   handler_factory,
                                   read_start_date,
                                   node_types=node_types,
                                   insight_path=insight_path)
            data_list = []
            sheet_name_list = []
            file_name = act_desc + '_' + act_id + '_' + filter_start_date + '_' + filter_end_date + '.xlsx'
            output_file = ospath.join(output_path, file_name)
            EliLogger.instance().info('Export account: ' + act_desc +
                                      '; outputPath: ' + output_file)
            print 'Export account: ' + act_desc + '; outputPath: ' + output_file

            for ntype in node_types:
                daily_data = reader.read_daily_data(ntype, filter_start_date,
                                                    filter_end_date)
                hourly_data = reader.read_hourly_data(ntype, filter_start_date,
                                                      filter_end_date)
                if daily_data is not None and not daily_data.empty:
                    data_list.append(daily_data)
                    daily_name = inconstants.SHEET_NAME_PREFIX_DAILY + '_' + ntype
                    sheet_name_list.append(daily_name)
                if hourly_data is not None and not hourly_data.empty:
                    data_list.append(hourly_data)
                    hourly_name = inconstants.SHEET_NAME_PREFIX_HOURLY + '_' + ntype
                    sheet_name_list.append(hourly_name)
                EliLogger.instance().info('Succeed to export node type: ' +
                                          ntype)
                print 'Succeed to export node type: ' + ntype

            if len(data_list) > 0:
                ExcelExporter.export_multi_data_excel(output_file, data_list,
                                                      sheet_name_list)
    except Exception, e:
        EliLogger.instance().error('Failed to export insight analysis. ' +
                                   e.message)
        print 'Failed to export insight analysis. ' + e.message
Пример #11
0
    conversion_data['All conv.'] = conversion_data['All conv.'].apply(str.replace, args=(',', '')).apply(float)
    conversion_data['Cost'] = conversion_data['Cost'].apply(str.replace, args=(',', '')).apply(float)

    install_data = pd.read_excel(install_file, parse_cols="B, C")
    install_data['campaign'] = install_data['campaign'].apply(str)
    install_data.rename(columns={'campaign': 'Campaign ID', 'install': 'AF Install'}, inplace=True)

    merger_data = pd.merge(conversion_data, install_data, on='Campaign ID', how='outer')
    merger_data.fillna(0, inplace=True)
    merger_data['Other Conversion'] = merger_data['All conv.'] - merger_data['Conversions']
    merger_data['Cost/AF'] = map(comhelper.division_operation, merger_data['Cost'], merger_data['AF Install'])
    merger_data['Cost/Coversion'] = map(comhelper.division_operation, merger_data['Cost'], merger_data['Conversions'])
    merger_data['Cost/All'] = map(comhelper.division_operation, merger_data['Cost'], merger_data['All conv.'])
    merger_data['Cost/Other'] = map(comhelper.division_operation, merger_data['Cost'], merger_data['Other Conversion'])

    export_data = merger_data[['Campaign ID', 'Campaign', 'AF Install', 'Conversions', 'All conv.', 'Other Conversion',
                               'Cost', 'Cost/AF', 'Cost/Coversion', 'Cost/All', 'Cost/Other']]

    date_str = comhelper.get_delta_date(-1)
    file_name = 'adword_conversion_' + date_str + '.xlsx'
    export_file = ospath.join(output_root_path, file_name)
    ExcelExporter.export_data_excel(export_data, export_file)


if __name__ == '__main__':
    path_manager = EnvManager()
    resource_path = path_manager.get_resource_dir()
    conversion_path = ospath.join(resource_path, 'aw_conversion')
    output_path = path_manager.get_output_dir()
    export_conversion(conversion_path, output_path)
    grouped_data = grouped_data.reset_index()
    grouped_data.rename(columns={
        'level_0':
        iconstant.FIELD_360_DATE,
        iconstant.INSIGHT_FIELD_ADSET_NAME:
        iconstant.FIELD_360_ADSET
    },
                        inplace=True)
    grouped_data[iconstant.FIELD_360_DATE] = grouped_data[
        iconstant.FIELD_360_DATE].astype('datetime64')
    return grouped_data


if __name__ == '__main__':

    path_manager = EnvManager()
    dir_360_retention = path_manager.get_360_retention_dir()
    file_list = fhelper.get_file_list(dir_360_retention)
    output_path = path_manager.get_output_dir()
    account_id = '1227059300703760'
    for advertiser_file in file_list:
        advertiser_data = get_360_retention_data(advertiser_file)
        insight_data = get_insight_data(account_id, path_manager)
        merger_data = pd.merge(
            advertiser_data,
            insight_data,
            how='left',
            on=[iconstant.FIELD_360_DATE, iconstant.FIELD_360_ADSET])
        merger_data.drop(
            [iconstant.FIELD_360_CAMPAIGN, iconstant.INSIGHT_FIELD_ADSET_ID],
            axis=1,
Пример #13
0
class InsightReader:
    def __init__(self,
                 account_id,
                 handler_factory,
                 start_date='',
                 end_date='',
                 node_types=None,
                 insight_path=''):
        self._path_manager = EnvManager()
        self._handler_factory = handler_factory
        self._current_act_id = account_id
        self._all_data = {}
        self._all_handler = {}
        self._start_date = start_date
        self._end_date = end_date
        if not end_date:
            self._end_date = mhelper.get_now_date()

        if node_types:
            self._node_type = node_types
        else:
            self._node_type = [
                mconstant.NODE_TYPE_CAMPAIGN, mconstant.NODE_TYPE_ADSET,
                mconstant.NODE_TYPE_AD
            ]

        self._insight_path = insight_path

        self._read_all_data()

    def read_daily_data(self, node_type, start_date='', end_date=''):
        if node_type not in self._all_handler:
            return None

        if node_type not in self._all_data:
            return None

        handler = self._all_handler[node_type]
        insight_data = self._all_data[node_type]
        data_per_day = handler.get_insight_data_per_day(insight_data)

        if not end_date:
            end_date = self._end_date
        index_mask = np.logical_and(data_per_day.index >= start_date,
                                    data_per_day.index <= end_date)
        filter_data = data_per_day[index_mask]
        return filter_data

    def read_hourly_data(self, node_type, start_date='', end_date=''):
        if node_type not in self._all_handler:
            return None

        if node_type not in self._all_data:
            return None

        handler = self._all_handler[node_type]
        insight_data = self._all_data[node_type]
        increment_data = handler.read_increment_by_data(insight_data)

        hour_data = handler.get_increment_per_hour(increment_data)
        if not end_date:
            end_date = self._end_date
        start_object = mhelper.convert_str_date(start_date, '%Y-%m-%d').date()
        end_object = mhelper.convert_str_date(end_date, '%Y-%m-%d').date()
        index_mask = np.logical_and(hour_data.index.date >= start_object,
                                    hour_data.index.date <= end_object)
        filter_data = hour_data[index_mask]
        return filter_data

    def _read_all_data(self):
        for ntype in self._node_type:
            data_handler = self._handler_factory.get_insight_handler(ntype)
            file_list = self._get_file_list(ntype)
            insight_datas = [
                data_handler.read_insight_data(file_path, self._current_act_id)
                for file_path in file_list
            ]
            if not insight_datas:
                continue

            all_insight_data = pd.concat(insight_datas)
            self._all_data[ntype] = all_insight_data
            self._all_handler[ntype] = data_handler

    def _get_file_list(self, node_type):
        node_dir = self._path_manager.get_insight_dir(node_type,
                                                      self._insight_path)
        account_dir = ospath.join(node_dir, self._current_act_id)
        node_files = fhelper.get_file_list(account_dir)
        filtered_files = [
            path for path in node_files if self._filter_file(path)
        ]
        return filtered_files

    def _filter_file(self, file_path):
        date_dirname = ospath.basename(file_path)
        pattern = '\d+-\d+-\d+'
        search_date = re.search(pattern, date_dirname)
        if not search_date:
            return True

        date_dir = search_date.group()
        if self._start_date <= date_dir <= self._end_date:
            return True
        else:
            return False