Пример #1
0
    def get_profile_id_name_map(
        self,
        website_name: str,
    ) -> dict:
        """ Fetch id and name properties of the profile class

        Parameters
        ----------
        website_name: str, obligatory
            Your targeted website_name in Eulerian Technologies platform

        Returns
        -------
        dict
            A dict as { "profile_id" : "profile_name" }
        """
        if not isinstance(website_name, str):
            raise TypeError("website_name should be a string")

        profile_url = f"{self._api_v2}/ea/{website_name}/db/profile/search.json"
        _json = _request._to_json(request_type="get",
                                  url=profile_url,
                                  params={
                                      "limit": 100,
                                      "output-as-kv": 1
                                  },
                                  headers=self._http_headers,
                                  print_log=self._print_log)

        return {
            _json["data"]["rows"][i]["profile_id"]:
            _json["data"]["rows"][i]["profile_name"]
            for i in range(len(_json["data"]["rows"]))
        }
Пример #2
0
    def get_view_id_name_map(self, website_name: str) -> dict:
        """ Fetch attribution rules

        Parameters
        ----------
        website_name: str, obligatory
            Your targeted website_name in Eulerian Technologies platform

        Returns
        -------
        dict
            A dict as { "view_id" : "view_name", ...}
        """
        if not isinstance(website_name, str):
            raise TypeError("website_name should be a string")

        view_url = f"{self._api_v2}/ea/{website_name}/db/view/get_all_name.json"
        view_json = _request._to_json(request_type="get",
                                      url=view_url,
                                      headers=self._http_headers,
                                      print_log=self._print_log)

        view_id_idx = view_json["data"]["fields"].index({"name": "view_id"})
        view_name_idx = view_json["data"]["fields"].index(
            {"name": "view_name"})
        views = {
            view[view_id_idx]: view[view_name_idx]
            for view in view_json["data"]["rows"]
        }

        if "0" not in views:
            views["0"] = "last channel"

        return views
Пример #3
0
    def get_website_by_name(self, website_name: str) -> dict:
        """ Fetch website properties

        Parameters
        ----------
        website_name: str, obligatory
            Your targeted website_name in Eulerian Technologies platform

        Returns
        -------
        dict
            A dict as { "website_prop" : "website_prop_value" }
        """
        if not isinstance(website_name, str):
            raise TypeError("website_name should be a string")

        website_url = f"{self._api_v2}/ea/{website_name}/db/website/get_me.json"
        website_json = _request._to_json(request_type="get",
                                         url=website_url,
                                         params={"output-as-kv": 1},
                                         headers=self._http_headers,
                                         print_log=self._print_log)

        d_website = website_json["data"]["rows"][0]
        if not isinstance(d_website, dict):
            raise TypeError(f"d_website={d_website} should be a dict dtype")
        return d_website
Пример #4
0
def _get_all_paths(
        self,
        i: int,
        l_path: list,
        l_prev_path: list,
        url: str,
        payload: dict,
):
    l_next_path = []
    for prev_path in l_prev_path:
        if not l_path[i].endswith("[%d]"):
            l_next_path.append(".".join([prev_path, l_path[i]]))

        else:
            payload["path"] = ".".join([prev_path, l_path[i].replace("[%d]", "")])
            _json = _request._to_json(
                url=url,
                request_type="get",
                headers=self._http_headers,
                params=payload,
                print_log=self._print_log
            )
            for _id in _get_ids(_json):
                l_next_path.append(".".join([prev_path, l_path[i] % int(_id)]))

    if i == len(l_path) - 1:
        return l_next_path
    i += 1
    return self._get_all_paths(
        i=i,
        l_path=l_path,
        l_prev_path=l_next_path,
        url=url,
        payload=payload)
def job_create(url, headers, query, log):
    request = {"kind": "edw#request", "query": query}
    return _request._to_json(request_type='post',
                             url=url,
                             json_data=request,
                             headers=headers,
                             print_log=log)
def session(domain, headers, ip, log):
    url = f"{domain}/er/account/get_dw_session_token.json"
    payload = {'ip': ip}
    json = _request._to_json(request_type="get",
                             url=url,
                             headers=headers,
                             params=payload,
                             print_log=log)
    return json['data']['rows'][0][0]
Пример #7
0
    def _check_credentials(self) -> None:
        """Check credentials validity
        set the allowed_website_names attributes
        """
        # raise an error if API error or fail to load as json
        allowed_website_names = []
        overview_url = f"{self._api_v2}/er/account/authtree.json"
        authtree_json = _request._to_json(request_type="get",
                                          url=overview_url,
                                          headers=self._http_headers,
                                          print_log=self._print_log)

        for k, v in authtree_json["data"].items():
            allowed_website_names.append(v["website_name"])

        self._allowed_website_names = allowed_website_names
Пример #8
0
def _all_paths_to_df(
        self,
        url: str,
        date_scale: str,
        l_path: [],
        l_dim,
        l_kpi,
        payload: {}
):
    payload["ea-columns"] = "name," + ",".join([*l_dim, *l_kpi])
    if date_scale:
        del(payload["ea-columns"])
        payload["date-scale"] = date_scale
        payload["dd-dt"] = ",".join([*l_dim, *l_kpi])

    l_slice_path = []
    l_df = []
    for i in range(len(l_path)):
        l_slice_path.append(l_path[i])
        if len(l_path) == 1 or (i and (i % 10 == 0 or i == len(l_path) - 1)):
            payload['path'] = ",".join(l_slice_path)
            _json = _request._to_json(
                url=url,
                request_type="get",
                params=payload,
                headers=self._http_headers,
                print_log=self._print_log
            )
            sub_df = pd.DataFrame(
                data=_json["data"]["rows"],
                columns=[d_field["name"] for d_field in _json["data"]["fields"]])

            l_df.append(sub_df)
            l_slice_path = []

    df_concat = pd.concat(
        objs=l_df,
        axis=0,
        ignore_index=True)

    return df_concat
def download_datamining(
    self,
    website_name: str,
    datamining_type: str,
    payload=None,
    status_waiting_seconds=5,
    output_directory='',
    override_file=False,
    n_days_slice=31,
):
    """ Fetch datamining data from the API into a gzip compressed CSV file

    Parameters
    ----------
    website_name : str, obligatory
        Your targeted website_name in Eulerian Technologies platform

    datamining_type : str, obligatory
        The targeted datamining (isenginerequest, actionlogorder, scart ,estimate, order)

    payload : dict, optional
        The datamining payload that contains the requested data

    status_waiting_seconds: int, optional
        Waiting time in seconds between each status query

    output_directory : str, optional
        The local targeted  directory

    override_file : bool, optional
        If set to True, will override output_path2file (if exists)
            with the new datamining content
        Default: False

    n_days_slice: int, optional
        Split datamining query into days slice to reduce server load
        Default: 31

    Returns
    -------
    list
        A list of path2file
    """
    if not isinstance(website_name, str):
        raise TypeError("website_name should be a str type")
    self._is_allowed_website_name(website_name)

    if not isinstance(datamining_type, str):
        raise TypeError("datamining_type should be a str type")

    if not isinstance(payload, dict) or not payload:
        raise TypeError("payload should be a non-empty dict")

    # solved a bug where the date_from and date_to of the payload were modified
    # initial payload object changed in a loop
    dc_payload = copy.deepcopy(payload)

    if not isinstance(n_days_slice, int) or n_days_slice < 0:
        raise TypeError("n_days_slice should be a positive integer")

    l_allowed_datamining_types = [
        "order", "estimate", "isenginerequest", "actionlog", "scart"
    ]

    if datamining_type not in l_allowed_datamining_types:
        raise ValueError(f"datamining_type={datamining_type} not allowed.\n\
                        Use one of the following: {', '.join(l_allowed_datamining_types)}"
                         )

    date_from = dc_payload["date-from"] if "date-from" in dc_payload else None
    if not date_from:
        raise ValueError("missing parameter=date-from in payload object")

    date_to = dc_payload['date-to'] if 'date-to' in dc_payload else None
    if not date_to:
        raise ValueError("missing parameter=date-from in payload object")

    date_format = "%m/%d/%Y"
    dt_date_from = datetime.strptime(date_from, date_format)
    dt_date_to = datetime.strptime(date_to, date_format)

    if dt_date_from > dt_date_to:
        raise ValueError("'date-from' cannot occur later than 'date-to'")

    # marketing attribution rule id, default to 0
    if "view-id" in dc_payload:
        dc_payload["view-id"] = str(dc_payload["view-id"])
        match = re.match(pattern=r'^[0-9]$', string=dc_payload["view-id"])
        if not match:
            raise ValueError("view-id should match ^[0-9]$")

    else:
        dc_payload["view-id"] = "0"

    website_name = website_name
    datamining_type = datamining_type
    status_waiting_seconds = status_waiting_seconds
    n_days_slice = n_days_slice
    dt_tmp_date_to = dt_date_to
    n_days_slice = timedelta(days=n_days_slice)
    one_day_slice = timedelta(days=1)
    _os._create_directory(output_directory=output_directory)
    l_path2file = []  # store each file for n_days_slice
    # To avoid overloading the API with huge requests
    # We split the requests into smaller timeranres "n_days_slice"
    # While temporary date to delta is smaller than requested time to delta
    while dt_tmp_date_to <= dt_date_to:
        dt_tmp_date_to = dt_date_from + n_days_slice
        # Cannot request further than dt_date_to
        dt_tmp_date_to = dt_date_to if dt_tmp_date_to >= dt_date_to else dt_tmp_date_to
        date_from = dt_date_from.strftime(date_format)
        date_to = dt_tmp_date_to.strftime(date_format)
        date_from_file = date_from.replace("/", "_")
        date_to_file = date_to.replace("/", "_")

        output_filename = "_".join([
            website_name,
            datamining_type,
            "view",
            dc_payload["view-id"],
            "from",
            date_from_file,
            "to",
            date_to_file,
        ]) + ".csv.gz"
        output_path2file = os.path.join(output_directory, output_filename)

        if not _request._is_skippable(output_path2file=output_path2file,
                                      override_file=override_file,
                                      print_log=self._print_log):
            dc_payload['date-from'] = date_from
            dc_payload['date-to'] = date_to

            search_url = f"{self._api_v2}/ea/{website_name}/report/{datamining_type}/search.json"

            search_json = _request._to_json(request_type="get",
                                            url=search_url,
                                            params=dc_payload,
                                            headers=self._http_headers,
                                            print_log=self._print_log)

            jobrun_id = search_json["jobrun_id"]

            status_url = f"{self._api_v2}/ea/{website_name}/report/{datamining_type}/status.json"
            status_payload = {"jobrun-id": jobrun_id}
            ready = False

            if not isinstance(status_waiting_seconds,
                              int) or status_waiting_seconds < 5:
                status_waiting_seconds = 5

            while not ready:
                self._log(f'Waiting for jobrun_id={jobrun_id} to complete')
                time.sleep(status_waiting_seconds)
                status_json = _request._to_json(request_type="get",
                                                url=status_url,
                                                params=status_payload,
                                                headers=self._http_headers,
                                                print_log=self._print_log)

                if status_json["jobrun_status"] == "COMPLETED":
                    ready = True

            download_url = f"{self._api_v2}/ea/{website_name}/report/{datamining_type}/download.json"
            download_payload = {'output-as-csv': 0, 'jobrun-id': jobrun_id}

            req = urllib.request.Request(
                url=
                f"{download_url}?{urllib.parse.urlencode(download_payload)}",
                headers=self._http_headers,
            )
            _stream_req(req=req, output_path2file=output_path2file)
        l_path2file.append(output_path2file)

        # last iteration, we queried up to the requested date
        if dt_tmp_date_to == dt_date_to:
            break

        # add one_day_slice to avoid querying the same day twice
        dt_date_from += n_days_slice + one_day_slice

    return l_path2file
Пример #10
0
def download_flat_overview_realtime_report(
        self,
        date_from: str,
        date_to: str,
        website_name: str,
        report_name: list,
        kpi: list,
        channel: list = None,
        view_id: int = 0,
        filters: dict = None) -> pd.DataFrame:
    """ Fetch realtime report data into a pandas dataframe

    Parameters
    ----------
    date_from: str, mandatory
        mm/dd/yyyy

    date_to: str, mandatory
        mm/dd/yyyy

    website_name: str, mandatory
        Your targeted website_name in Eulerian Technologies platform

    report_name: str, mandatory

    kpi: list, mandatory
        List of kpis to request

    channel: list, mandatory
        List of channels (ADVERTISING...)

    view_id: int, optional
        Between 0 and 9

    filters: dict, optional
        To filter request results

    Returns
    -------
    pd.DataFrame()
        A pandas dataframe
    """

    if not isinstance(date_from, str):
        raise TypeError("date_from should be a string dtype")

    if not isinstance(date_to, str):
        raise TypeError("date_to should be a string dtype")

    if not isinstance(website_name, str):
        raise TypeError("website_name should be a string dtype")

    if not isinstance(report_name, str):
        raise TypeError("report_name should be a string dtype")

    if not isinstance(kpi, list):
        raise TypeError("kpi should be a list dtype")

    payload = {
        'date-from': date_from,
        'date-to': date_to,
    }

    if filters:
        if not isinstance(filters, dict):
            raise TypeError(f"filters={filters} should be a dict dtype")
        filters = self.check_convert_realtime_filter(website_name, filters)
    else:
        filters = {}

    for k in filters.keys():
        if len(filters[k]):
            payload[k] = filters[k]

    view_id = str(view_id)
    view_map = self.get_view_id_name_map(website_name)
    if view_id not in view_map:
        raise ValueError(
            f"view_id={view_id} not found. Allowed: {', '.join(view_map.keys())}"
        )

    payload["view-id"] = view_id

    d_website = self.get_website_by_name(website_name)
    url = f"{self._api_v2}/ea/{website_name}/report/realtime/{report_name}.json"
    path_module = __import__(
        name="eanalytics_api_py.internal.realtime_overview.path._" +
        report_name,
        fromlist=report_name)

    l_df = []
    d_path = copy.deepcopy(
        path_module.d_path)  # because we override values we want a clean copy

    if not channel:
        channel = list(d_path.keys())

    for _channel in channel:
        l_path = d_path[_channel]["path"]
        l_path[0] = l_path[0] % int(d_website["website_id"])

        l_dim = d_path[_channel]["dim"]
        if not isinstance(l_dim, list):
            raise TypeError(f"l_dim={l_dim} should be a list dtype")

        payload['path'] = ".".join(l_path)
        payload['ea-columns'] = ",".join([*l_dim, *kpi])

        _json = _request._to_json(url=url,
                                  request_type="get",
                                  params=payload,
                                  headers=self._http_headers,
                                  print_log=True)

        sub_df = pd.DataFrame(
            data=_json["data"]["rows"],
            columns=[d_field["name"] for d_field in _json["data"]["fields"]])

        if "add_dim_value_map" in d_path[_channel]:
            for _dim, _value in d_path[_channel]["add_dim_value_map"].items():
                sub_df[_dim] = _value

        if "rename_dim_map" in d_path[_channel]:
            sub_df.rename(columns=d_path[_channel]["rename_dim_map"],
                          inplace=True)

        # override name with alias if alias is set
        for name, alias in path_module.override_dim_map.items():
            if all(_ in sub_df.columns for _ in [name, alias]):
                mask = (sub_df[alias].isin([0, '0']))
                sub_df.loc[mask, alias] = sub_df[name]
                sub_df.drop(labels=alias, axis=1, inplace=True)

        sub_df.rename(columns=path_module.dim_px_map, inplace=True)
        l_df.append(sub_df)

    df = pd.concat(l_df, axis=0, ignore_index=True)

    for col_name in df.columns:
        if col_name in path_module.dim_px_map.values():
            df[col_name] = df[col_name].astype("category")
        elif any(df[col_name].astype("str").str.contains(".", regex=False)):
            df[col_name] = df[col_name].astype("float64")
        else:
            df[col_name] = df[col_name].astype("int64")

    return df
def job_status(url, headers, log):
    return _request._to_json(request_type='get',
                             url=url,
                             headers=headers,
                             print_log=log)