Пример #1
0
    def create_data(
        X: dt.Frame = None
    ) -> Union[str, List[str], dt.Frame, List[dt.Frame], np.ndarray,
               List[np.ndarray], pd.DataFrame, List[pd.DataFrame], Dict[
                   str, str],  # {data set names : paths}
               Dict[str, dt.Frame],  # {data set names : dt frames}
               Dict[str, np.ndarray],  # {data set names : np arrays}
               Dict[str, pd.DataFrame],  # {data set names : pd frames}
               ]:
        col_count = 2
        col_names = ["random_col_1", "random_col_2"]

        if col_count != len(col_names):
            raise ValueError(
                "Number of column names must be equal to number of columns.")

        if X is None:
            return []

        rcol = dt.Frame(np.random.randint(0, 100,
                                          size=(X.shape[0], col_count)))
        rcol.names = col_names
        X.cbind(rcol)

        return X
Пример #2
0
    def transform(self, X: dt.Frame):
        def get_ua_info(ua_string):
            from user_agents import parse
            ua = parse(ua_string)
            return ua.browser.family, ua.os.family, ua.device.family, ua.is_mobile, ua.is_tablet

        ua_column_names = ['ua', 'user-agent', 'user_agent', 'useragent']
        col_name = X.names[0]
        if col_name in ua_column_names:
            newnames = ("browser", "os", "device", "is_mobile", "is_tablet")
            Y = X[col_name].to_list()[0]
            Z = dt.Frame([get_ua_info(x) for x in Y],
                         names=[f"{col_name}_{s}" for s in newnames])
            X.cbind(Z)
            return X
        else:
            return X.to_pandas().iloc[:, 0]
    def create_data(X: dt.Frame = None) -> Union[
        str, List[str],
        dt.Frame, List[dt.Frame],
        np.ndarray, List[np.ndarray],
        pd.DataFrame, List[pd.DataFrame],
        Dict[str, str],  # {data set names : paths}
        Dict[str, dt.Frame],  # {data set names : dt frames}
        Dict[str, np.ndarray],  # {data set names : np arrays}
        Dict[str, pd.DataFrame],  # {data set names : pd frames}
    ]:

        pytrends = TrendReq(hl='en-US', tz=360)
        kw_list = ["sanitizing wipes"]
        geo = ["US-TX", "US-CA", "US-NY"]
        geo = "US-TX"
        timeframe = '2020-01-01 2020-05-08'
        timeframe = 'today 5-y'
        pytrends.build_payload(kw_list, timeframe=timeframe, geo=geo)

        trends = pytrends.interest_over_time()
        X = dt.Frame(date = trends.index.to_list())
        X.cbind(dt.Frame(gtrend = trends.iloc[:, 0].tolist(), isPartial = trends.iloc[:, 1].tolist()))

        return {"gtrends_sanitizing_wipes":X[dt.f.isPartial == 'False', :]}