def read_ad_data(account_id, campaign_id):
    path_manager = EnvManager()
    insight_dir = path_manager.get_insight_dir(mconstant.NODE_TYPE_AD)
    account_dir = os.path.join(insight_dir, account_id)

    config_path = path_manager.get_conf_dir()
    output_path = os.path.join(path_manager.get_output_dir(),
                               mconstant.NODE_TYPE_AD)
    fhelper.make_dir(output_path)

    handler_factory = HandlerFactory(config_path)
    insight_handler = handler_factory.get_insight_handler(
        mconstant.NODE_TYPE_AD)

    in_act_files = fhelper.get_file_list(account_dir)

    data_per_day = read_ad_insight_day(in_act_files, insight_handler,
                                       campaign_id)

    ad_name_key = ''
    group_data = group_by_name(data_per_day, ad_name_key)

    if ad_name_key.strip():
        file_name = account_id + '_' + ad_name_key + '_' + str(
            time.time()) + '.xlsx'
        act_file = os.path.join(output_path, file_name)
        ExcelExporter.export_data_excel(group_data, act_file)
    else:
        file_name = account_id + '_' + str(time.time()) + '.xlsx'
        act_file = os.path.join(output_path, file_name)
        ExcelExporter.export_excel_by_key(iconstant.NEW_FIELD_GROUP_AD_NAME,
                                          iconstant.NEW_FIELD_GROUP_AD_NAME,
                                          group_data, act_file)
Exemplo n.º 2
0
def merger_all_account_data(node_type):
    path_manager = EnvManager()
    insight_dir = path_manager.get_insight_dir(node_type)
    retention_dir = path_manager.get_retention_dir(node_type)
    account_list = fhelper.get_subdir_name_list(retention_dir)

    config_path = path_manager.get_conf_dir()
    merger_handler = MergerDataHandler(config_path)

    output_path = os.path.join(path_manager.get_output_dir(), node_type)
    fhelper.make_dir(output_path)

    for account_id in account_list:
        in_act_dir = os.path.join(insight_dir, account_id)
        re_act_dir = os.path.join(retention_dir, account_id)
        in_act_files = fhelper.get_file_list(in_act_dir)
        re_act_files = fhelper.get_file_list(re_act_dir)
        merger_data = merger_handler.merger_in_re_per_day(
            in_act_files, re_act_files, node_type)

        output_act_path = os.path.join(output_path, account_id)
        fhelper.make_dir(output_act_path)
        ExcelExporter.export_merger_retention(dhelper.get_key_id(node_type),
                                              dhelper.get_key_id(node_type),
                                              merger_data, output_act_path)
Exemplo n.º 3
0
def read_all_account_data(node_type):
    path_manager = EnvManager()
    insight_dir = path_manager.get_insight_dir(node_type)
    account_list = fhelper.get_subdir_name_list(insight_dir)

    config_path = path_manager.get_conf_dir()
    output_path = os.path.join(path_manager.get_output_dir(), node_type)
    fhelper.make_dir(output_path)

    handler_factory = HandlerFactory(config_path)
    insight_handler = handler_factory.get_insight_handler(node_type)

    for account_id in account_list:
        in_act_dir = os.path.join(insight_dir, account_id)
        in_act_files = fhelper.get_file_list(in_act_dir)

        file_name = account_id + '_' + str(time.time()) + '.xlsx'
        act_file = os.path.join(output_path, file_name)

        read_insight_per_day(in_act_files, insight_handler, act_file, node_type, account_id)
Exemplo n.º 4
0
class InsightReader:
    def __init__(self,
                 account_id,
                 handler_factory,
                 start_date='',
                 end_date='',
                 node_types=None,
                 insight_path=''):
        self._path_manager = EnvManager()
        self._handler_factory = handler_factory
        self._current_act_id = account_id
        self._all_data = {}
        self._all_handler = {}
        self._start_date = start_date
        self._end_date = end_date
        if not end_date:
            self._end_date = mhelper.get_now_date()

        if node_types:
            self._node_type = node_types
        else:
            self._node_type = [
                mconstant.NODE_TYPE_CAMPAIGN, mconstant.NODE_TYPE_ADSET,
                mconstant.NODE_TYPE_AD
            ]

        self._insight_path = insight_path

        self._read_all_data()

    def read_daily_data(self, node_type, start_date='', end_date=''):
        if node_type not in self._all_handler:
            return None

        if node_type not in self._all_data:
            return None

        handler = self._all_handler[node_type]
        insight_data = self._all_data[node_type]
        data_per_day = handler.get_insight_data_per_day(insight_data)

        if not end_date:
            end_date = self._end_date
        index_mask = np.logical_and(data_per_day.index >= start_date,
                                    data_per_day.index <= end_date)
        filter_data = data_per_day[index_mask]
        return filter_data

    def read_hourly_data(self, node_type, start_date='', end_date=''):
        if node_type not in self._all_handler:
            return None

        if node_type not in self._all_data:
            return None

        handler = self._all_handler[node_type]
        insight_data = self._all_data[node_type]
        increment_data = handler.read_increment_by_data(insight_data)

        hour_data = handler.get_increment_per_hour(increment_data)
        if not end_date:
            end_date = self._end_date
        start_object = mhelper.convert_str_date(start_date, '%Y-%m-%d').date()
        end_object = mhelper.convert_str_date(end_date, '%Y-%m-%d').date()
        index_mask = np.logical_and(hour_data.index.date >= start_object,
                                    hour_data.index.date <= end_object)
        filter_data = hour_data[index_mask]
        return filter_data

    def _read_all_data(self):
        for ntype in self._node_type:
            data_handler = self._handler_factory.get_insight_handler(ntype)
            file_list = self._get_file_list(ntype)
            insight_datas = [
                data_handler.read_insight_data(file_path, self._current_act_id)
                for file_path in file_list
            ]
            if not insight_datas:
                continue

            all_insight_data = pd.concat(insight_datas)
            self._all_data[ntype] = all_insight_data
            self._all_handler[ntype] = data_handler

    def _get_file_list(self, node_type):
        node_dir = self._path_manager.get_insight_dir(node_type,
                                                      self._insight_path)
        account_dir = ospath.join(node_dir, self._current_act_id)
        node_files = fhelper.get_file_list(account_dir)
        filtered_files = [
            path for path in node_files if self._filter_file(path)
        ]
        return filtered_files

    def _filter_file(self, file_path):
        date_dirname = ospath.basename(file_path)
        pattern = '\d+-\d+-\d+'
        search_date = re.search(pattern, date_dirname)
        if not search_date:
            return True

        date_dir = search_date.group()
        if self._start_date <= date_dir <= self._end_date:
            return True
        else:
            return False