def create_data( X: dt.Frame = None ) -> Union[str, List[str], dt.Frame, List[dt.Frame], np.ndarray, List[np.ndarray], pd.DataFrame, List[pd.DataFrame], Dict[ str, str], # {data set names : paths} Dict[str, dt.Frame], # {data set names : dt frames} Dict[str, np.ndarray], # {data set names : np arrays} Dict[str, pd.DataFrame], # {data set names : pd frames} ]: col_count = 2 col_names = ["random_col_1", "random_col_2"] if col_count != len(col_names): raise ValueError( "Number of column names must be equal to number of columns.") if X is None: return [] rcol = dt.Frame(np.random.randint(0, 100, size=(X.shape[0], col_count))) rcol.names = col_names X.cbind(rcol) return X
def transform(self, X: dt.Frame): def get_ua_info(ua_string): from user_agents import parse ua = parse(ua_string) return ua.browser.family, ua.os.family, ua.device.family, ua.is_mobile, ua.is_tablet ua_column_names = ['ua', 'user-agent', 'user_agent', 'useragent'] col_name = X.names[0] if col_name in ua_column_names: newnames = ("browser", "os", "device", "is_mobile", "is_tablet") Y = X[col_name].to_list()[0] Z = dt.Frame([get_ua_info(x) for x in Y], names=[f"{col_name}_{s}" for s in newnames]) X.cbind(Z) return X else: return X.to_pandas().iloc[:, 0]
def create_data(X: dt.Frame = None) -> Union[ str, List[str], dt.Frame, List[dt.Frame], np.ndarray, List[np.ndarray], pd.DataFrame, List[pd.DataFrame], Dict[str, str], # {data set names : paths} Dict[str, dt.Frame], # {data set names : dt frames} Dict[str, np.ndarray], # {data set names : np arrays} Dict[str, pd.DataFrame], # {data set names : pd frames} ]: pytrends = TrendReq(hl='en-US', tz=360) kw_list = ["sanitizing wipes"] geo = ["US-TX", "US-CA", "US-NY"] geo = "US-TX" timeframe = '2020-01-01 2020-05-08' timeframe = 'today 5-y' pytrends.build_payload(kw_list, timeframe=timeframe, geo=geo) trends = pytrends.interest_over_time() X = dt.Frame(date = trends.index.to_list()) X.cbind(dt.Frame(gtrend = trends.iloc[:, 0].tolist(), isPartial = trends.iloc[:, 1].tolist())) return {"gtrends_sanitizing_wipes":X[dt.f.isPartial == 'False', :]}