def ggl_trends(grouped, keyword): pytrends = TrendReq(hl='en-US', tz=360) kw_list = [keyword] pytrends.build_payload(kw_list, cat=0, timeframe='all', geo='US', gprop='') ggl_trends = pytrends.interest_over_time() if ggl_trends.empty: return pd.DataFrame() grouped_ggl_trends = ggl_trends.groupby(pd.Grouper(freq='1m')).mean().rename(columns={keyword: 'Google Trends'}) return grouped.merge(grouped_ggl_trends, left_index=True, right_index=True, how='inner')
def regions(l_args, s_ticker): parser = argparse.ArgumentParser( add_help=False, prog="regions", description= """Plot bars of regions based on stock's interest. [Source: Google]""", ) parser.add_argument( "-n", "--num", action="store", dest="n_num", type=check_positive, default=10, help="number of regions to plot that show highest interest.", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) if not ns_parser: return pytrend = TrendReq() pytrend.build_payload(kw_list=[s_ticker]) df_interest_region = pytrend.interest_by_region() df_interest_region = df_interest_region.sort_values( [s_ticker], ascending=False).head(ns_parser.n_num) plt.figure(figsize=(25, 5)) plt.title(f"Top's regions interest on {s_ticker}") plt.bar(df_interest_region.index, df_interest_region[s_ticker], width=0.8) plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.ylabel("Interest [%]") plt.xlabel("Region") plt.show() print("") except Exception as e: print(e) print("")
def fetch(self, keyword): import datetime pytrends = TrendReq(hl='en-US', tz=360) ndate = datetime.date.today() yr = ndate.year mnth = ndate.month day = ndate.day date_last_1y = datetime.date(int(yr), int(mnth), int(day)) - datetime.timedelta(days=365) from_to_date_1y = '{}-{}-{} {}'.format(date_last_1y.year, date_last_1y.month, date_last_1y.day, ndate) kw_list = [keyword] pytrends.build_payload(kw_list, cat=0, timeframe=from_to_date_1y, geo=self.country, gprop='') d = pytrends.interest_over_time() if not d.empty: t = int(d[keyword][-2]) if t > 70: nature = 'Hot' elif t > 50 and t <= 70: nature = 'High' else: nature = 'Medium' xc = { 'keyword': keyword, 'source': 'googletrends', 'type': 'hotness', 'Nature': nature, 'value': int(t) } else: xc = { 'keyword': keyword, 'source': 'googletrends', 'type': 'hotness', 'Nature': 'No Data', 'value': 'No Data' } return xc
def trender(a, term, term_no): pytrends = TrendReq(hl='en-US', tz=360) pytrends.build_payload(term, cat=0, timeframe='all', geo='GB', gprop='') interest = pytrends.interest_over_time() b = 0 # set the values at the correct indices in the 2D result array for index, row in interest.iterrows(): indexstring = str(index) result[b][0] = indexstring result[b][a + 1] = str(row[term[0]]) result[b][a + 2] = str(row[term[1]]) result[b][a + 3] = str(row[term[2]]) result[b][a + 4] = str(row[term[3]]) if len(term) == 5: result[b][a + 5] = str(row[term[4]]) b += 1 print term_no
def getTrendData(keyword, year=YEAR, month=MONTH): pytrends = TrendReq(hl='en-US', tz=360) dataset = [] # end_date = datetime.now() end_date = date(year, month, getLastDayOfMonth(year, month)) start_date = date(year - 1, month, 1) thisYear = start_date.strftime('%Y-%m-%d') + ' ' + end_date.strftime( "%Y-%m-%d") pytrends.build_payload(kw_list=[keyword], timeframe=thisYear) data = pytrends.interest_over_time() if not data.empty: data = data.drop(labels=['isPartial'], axis='columns') dataset.append(data) dataset = pd.concat(dataset, axis=1) return dataset
def prediction(key_word): pytrends = TrendReq(hl='en-US', tz=360) pytrends.build_payload([key_word], cat=0, timeframe='2021-01-01 2021-01-15', gprop='', geo='') df = pytrends.interest_over_time() std = pd.DataFrame.from_dict(df) std['Moving Average'] = std[key_word].rolling(2).mean() std[[key_word, 'Moving Average']].plot(figsize=(10, 4)) plt.grid(True) plt.title(key_word + " Google Trends" ' Moving Averages') plt.axis('tight') plt.ylabel('Searches') plt.savefig('template/static/images/prediction.png') plt.close()
class GoogleTrendStatsEvaluator(StatsSocialEvaluator): def __init__(self): super().__init__() self.pytrends = None self.is_threaded = False # Use pytrends lib (https://github.com/GeneralMills/pytrends) # https://github.com/GeneralMills/pytrends/blob/master/examples/example.py def get_data(self): self.pytrends = TrendReq(hl='en-US', tz=0) # self.pytrends.GENERAL_URL = "https://trends.google.com/trends/explore" # self.symbol key_words = [self.symbol] try: # looks like only 1 and 3 months are working ... time_frame = "today " + str(self.social_config[STATS_EVALUATOR_HISTORY_TIME]) + "-m" # Attention apparement limite de request / h assez faible self.pytrends.build_payload(kw_list=key_words, cat=0, timeframe=time_frame, geo='', gprop='') except ResponseError as e: self.logger.warn(str(e)) def eval_impl(self): interest_over_time_df = self.pytrends.interest_over_time() # compute bollinger bands self.eval_note = AdvancedManager.get_class(self.config, StatisticAnalysis).analyse_recent_trend_changes( interest_over_time_df[self.symbol], numpy.sqrt) def run(self): pass # check if history is not too high def load_config(self): super(GoogleTrendStatsEvaluator, self).load_config() if self.social_config[STATS_EVALUATOR_HISTORY_TIME] > STATS_EVALUATOR_MAX_HISTORY_TIME: self.social_config[STATS_EVALUATOR_HISTORY_TIME] = STATS_EVALUATOR_MAX_HISTORY_TIME def set_default_config(self): self.social_config = { CONFIG_REFRESH_RATE: 3600, STATS_EVALUATOR_HISTORY_TIME: 3 }
def return_graph(self): trendshow = TrendReq(hl='en-US', tz=360) kw_list = [] if self.top_bool: for k in range(0, 5): kw_list.append(self.get_top_anime_names(k, 'tv')) else: kw_list.append(self.search_anime(self.anime_name)) kw_group = list(zip(*[iter(kw_list)] * 1)) kw_grplist = [list(x) for x in kw_group] dic = {} i = 0 for kw in kw_grplist: trendshow.build_payload(kw, timeframe='today ' + self.time_scale, geo='') dic[i] = trendshow.interest_over_time() i += 1 trendframe = pd.concat(dic, axis=1) trendframe.columns = trendframe.columns.droplevel(0) trendframe = trendframe.drop('isPartial', axis=1) fig = { 'data': [ go.Scatter(x=trendframe.index, y=trendframe[col], name=col, line=dict(color=self.graph_color)) for col in trendframe.columns ], 'layout': dict( #legend=dict(font=dict(color='#7f7f7f')), paper_bgcolor='#27293d', plot_bgcolor='rgba(0,0,0,0)', font=dict(color='white'), showlegend=True) } return dcc.Graph(id=self.graph_id, figure=fig)
def get_searches(key_word): pytrends = TrendReq(hl='en-US', tz=360) pytrends.build_payload([key_word], cat=0, timeframe='2020-01-01 2021-01-15', gprop='', geo='') df = pytrends.interest_over_time() print(df.head()) sns.set() df['timestamp'] = pd.to_datetime(df.index) sns.lineplot(x=df['timestamp'], y=df[key_word]) plt.title("Normalized Searches for {}".format(key_word)) plt.ylabel("Number of Searches") plt.xlabel("Date") plt.savefig("template/static/images/search.png") plt.close()
def get_trend_df_list(init_trend_list, time_frame): trend_list_list = Functions.group_list_by_size(init_trend_list, 5) py_trends = TrendReq(hl='en-US', tz=360, timeout=(10, 25)) trends_df_list = [] for trend_list in trend_list_list: py_trends.build_payload(trend_list, cat=0, timeframe=time_frame, geo="US", gprop="") trends_df = py_trends.interest_over_time() trends_df = trends_df.drop(["isPartial"], axis=1) for col in trends_df.columns: trends_df_list.append(trends_df[[col]]) return trends_df_list
def get_keywords(request): if request.method == 'POST': form = KeyWords(request.POST) if form.is_valid(): # process data keywords = form.cleaned_data['keywords'].split() pytrend = TrendReq() pytrend.build_payload(kw_list=keywords) interest_over_time = pytrend.interest_over_time() interest_html = interest_over_time.to_html() return render(request, 'index.html', { 'data': interest_html, 'form': form }) else: form = KeyWords() return render(request, 'index.html', {'form': form})
def update_trends(proxies: List[str] = ["http://179.108.169.71:8080"], topics=topics, countries=countries) -> None: for topic in topics: for country in countries: lang = state_lang(country) #lang should be like in LANGUAGES.txt kw_list = load_kws(kw_tmpl + lang + ext) #pytrends initialization pytrends = TrendReq(hl=lang, tz=0) #pytrends = TrendReq(hl='lang', tz=360, timeout=(10,25), proxies=['https://34.203.233.13:80',], retries=2, backoff_factor=0.1) pytrends.build_payload(kw_list, cat=0, timeframe='today 1-w', geo='', gprop='')
def trends(topic): s = pd.date_range(start='1/1/2011', end='9/1/2018', freq='MS') # start time e = pd.date_range(start='1/1/2011', end='10/1/2018', freq='M') # end time pytrends = TrendReq(hl='en-US', tz=0) kw_list = [topic] # topic df = None for i in range(len(s)): frame = s[i].strftime('%Y-%m-%d') + " " + e[i].strftime( '%Y-%m-%d') # set start day of the month and end day of this month pytrends.build_payload(kw_list, cat=0, timeframe=frame, geo='', gprop='') # use googleview api to get the data interest_over_time_df = pytrends.interest_over_time() if df is None: df = interest_over_time_df else: df = df.append(interest_over_time_df) print(len(df.index)) print(frame) df.to_csv('bitcoin.csv', sep=',', encoding='utf-8')
def _fetch_data(trendreq: TrendReq, kw_list: list[str], timeframe: str = 'today 3-m', cat: int = 0) -> pd.DataFrame: """Download google trends data using pytrends TrendReq and retries in case of a ResponseError.""" attempts, fetched = 0, False while not fetched: try: trendreq.build_payload( kw_list=kw_list, timeframe=timeframe, cat=cat, geo='', gprop='') except ResponseError as e: print(e) print(f'Trying again in {60 + 5 * attempts} seconds.') sleep(60 + 5 * attempts) attempts += 1 if attempts > 3: print('Failed after 3 attempts, abort fetching.') raise ce.RateLimited else: fetched = True return trendreq.interest_over_time()
def GoogleTrendsSlopeCalculator(request): # Add your Gmail username to the google_username variable and your Gmail password to the google_password variable. google_username = "******" google_password = "******" connector = TrendReq(google_username, google_password) # This script downloads a series of CSV files from Google Trends. Please specify a filepath for where you'd like these files to be stored in the below variable. path = "" # Specify the filename of a CSV with a list of keywords in the variable, keyordcsv. The CSV should be one column, with header equal to Keywords (case sensitive). keywordcsv = "http://localhost:8000/static/keywords.csv" keywords = pd.read_csv(keywordcsv) pytrend = TrendReq() # Create payload and capture API tokens. Only needed for interest_over_time(), interest_by_region() & related_queries() pytrend.build_payload(kw_list=['pizza', 'bagel']) # Interest Over Time interest_over_time_df = pytrend.interest_over_time() print(interest_over_time_df.head())
async def get_trends(item: Item): pytrend = TrendReq(hl='pt-BR', tz=360) keywords = [item.mensagem] pytrend.build_payload(kw_list=keywords, cat=0, timeframe='today 1-m', geo='BR', gprop='news') data = pytrend.interest_over_time() hoje = str(data[item.mensagem][29]) ontem = str(data[item.mensagem][28]) if data[item.mensagem][29] > data[item.mensagem][28]: resultado = 'Os interesse de pesquisa de hoje foram maior que ontem' else: resultado = 'Os interesse de pesquisa de hoje foram menor que ontem' return { "interesse de pesquisa relativo hoje": hoje, "interesse de pesquisa relativo ontem": ontem, "variação": resultado }
def PytrendJob(): pytrend = TrendReq(tz=540) todayDate = datetime.now().strftime('%Y-%m-%d') lastWeekDate = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d') period = lastWeekDate + ' ' + todayDate pytrend.build_payload(kw_list=['쿠팡', '11번가'], geo='KR') dataCrawling = pytrend.interest_over_time() csvFileName = todayDate + '.csv' data = pd.DataFrame(dataCrawling)[['쿠팡', '11번가']] data['쿠팡증감률'] = data['쿠팡'].diff().fillna(0).astype(int) data['11번가증감률'] = data['11번가'].diff().fillna(0).astype(int) data.to_csv(csvFileName, index=True) print(todayDate + ": Google Trends Crawling ok…")
def explain_unemployment(rollingwindow=12): # Main function statecodes = pd.read_csv(_rootPath + _statecodes) statemap = dict(zip(statecodes['Code'].apply(lambda x:'LAUST'+str(x).zfill(2)+'0000000000003'),statecodes['State'])) # Create mapping statemap[_nationcode] = 'US' # Add US/nation-wide to mapping actualdata = pd.read_excel(_rootPath + _inputfile, 'BLS Data Series', header=3, index_col=0) actualdata = actualdata.T actualdata = actualdata.rename(columns=statemap) statesnamemap = dict(zip(_statesabr.values(),_statesabr.keys())) actualdata = actualdata.rename(columns=statesnamemap) # Change from state names to abbreviations actualdata = actualdata.drop(['PR','DC'],axis=1) # Removing Puerto Rice and DC actualdata = actualdata.dropna(how='all') pytrend = TrendReq() # Google Trends API inputdata = {} for geo in actualdata.columns: pytrend.build_payload(kw_list=['unemployment'], timeframe='all', geo=geo if geo == 'US' else 'US-'+geo) # Search for unemployment per geography inputdata[geo] = pytrend.interest_over_time()['unemployment'] inputdata = pd.DataFrame(inputdata) inputdata = inputdata.shift().dropna(how='all') # Lag data as it is saved as first of month when should be last sampledata = {} # Sample plot sampledata['Unemployment Rate'] = actualdata['US'] sampledata['Unemployment Google Trends'] = inputdata['US'] sampledata = pd.DataFrame(sampledata) sampledata.plot(secondary_y=['Unemployment Google Trends']) basestatsdf, basefitted = linearmodel(actualdata, inputdata, rollingwindow, usegoogle=False) # Base model statsdf, fitted = linearmodel(actualdata, inputdata, rollingwindow, usegoogle=True,stdize=True) # With google statsdf.loc['Google_Tstat'].plot(kind='bar') plt.tight_layout() sampledata = {} sampledata['Unemployment Rate'] = actualdata['US'] sampledata['Fitted (Base Model)'] = basefitted['US'] sampledata['Fitted (Including Google)'] = fitted['US'] sampledata = pd.DataFrame(sampledata) sampledata.plot() return basestatsdf, statsdf
def trackerfunc(driver_list, verbose=1): ''' This function looks into what F1 drivers get searched in which region and how often. params --------- driver_list:list of drivers ''' if verbose>1: print('Starting trackerfunc') total_drivers = len(driver_list) # Number of times we need to loop over max_google_request = 5 iter = int(total_drivers/max_google_request) # Difference we need to add every time diff = int(total_drivers/iter) # Set an empty df full_results_region = pd.DataFrame() counter = 0 for i in range(iter): pytrend = TrendReq() iter_drivers = driver_list[counter:counter+diff] #print('COMPLETE:',iter_drivers) counter+=diff time.sleep(10) try: pytrend.build_payload(kw_list=iter_drivers) df_region = pytrend.interest_by_region() ## Can add other things to the payload #df_interest_over_time = pytrend.interest_over_time() # APPEND TO FULL RESULTS full_results_region = full_results_region.append(df_region) full_results_region['datetime'] = DATE print('full_results len',len(full_results_region)) # CHECK print('COMPLETE:',iter_drivers) except: print ('ERROR',len(iter_drivers)) # SAVE # full_results_region.to_csv('full_results_region.csv',index=False) return full_results_region
def getTrendData(keyword, timeframe=timeFrame): """ Parameters ---------- keyword : str Keyword to get trend data for Returns ------- yoyIncrease : float Year-over-year increase for given keyword """ dataset = [] pytrends = TrendReq(hl='en-US', tz=360) #Create pytrend query pytrends.build_payload( kw_list=[keyword], # Build payload timeframe=timeFrame, # Timeframe from above geo='US') # US only, remove for global data = pytrends.interest_over_time() # Pull data from query if not data.empty: data = data.drop(labels=['isPartial'], axis='columns') dataset.append(data) # Cleaning df dataset = pd.concat(dataset, axis=1) lastYear = dataset.head(4) #Last year's data is first four weeks of df thisYear = dataset.tail(4) #This year's data is last four weeks of df df = pd.DataFrame() df = df.append(lastYear) df = df.append(thisYear) yoyIncrease = ((thisYear.mean(axis=0)[0] - lastYear.mean(axis=0)[0]) / lastYear.mean(axis=0)[0]) * 100 return yoyIncrease
def start(search_words, start_date): print("... google module started") now = datetime.datetime.now() # Parameters for GOOGLE search kw_list = [search_words] year_start = int(start_date[:4]) month_start = int(start_date[5:7]) day_start = int(start_date[8:10]) hour_start = 0 print(kw_list, " ", year_start, " ", month_start, " ", day_start) # setting actual date for goggle search endpoint year_end = now.year month_end = now.month day_end = now.day hour_end = 0 print(year_end, " ", month_end, " ", day_end) pytrend = TrendReq() pytrend.build_payload(kw_list) search_results = pytrend.get_historical_interest(kw_list, year_start, month_start, day_start, hour_start, year_end, month_end, day_end, hour_end, cat=0, geo='', gprop='', sleep=0) # save into file search_results.to_csv('google_results.csv') # print the first 10 datapoints print(search_results.head(10))
def calculate_interest_over_time(request_user, account_user, niches, network): verified_acc = VerifiedUserAccounts.objects.filter( network=network, account_id=account_user.id).first() niches = _build_niche_str_arr(niches, verified_acc, network)[:4] if request_user.is_authenticated() and \ (request_user.is_superuser or is_assistant(request_user) or \ verified_acc in request_user.opened_accounts.all()): niches.insert(0, account_user.username) try: pytrend = TrendReq('*****@*****.**', 'shoutourbiz123', \ hl='en-US', tz=360, custom_useragent=None) pytrend.build_payload(kw_list=niches) df = pytrend.interest_over_time() except Exception as e: return (None, None) cols = {} x_axis = [] # initialize header groups for header in df.dtypes.index: cols[header] = [] ndx = 0 for index, row in df.iterrows(): if ndx % 2 == 0 or ndx % 3 == 0 or ndx % 5 == 0 or ndx % 7 == 0 or ndx % 4 == 0: ndx += 1 continue if not index.value in x_axis: x_axis.append(index.value) for key, value in cols.iteritems(): cols[key].append(row[key]) ndx += 1 return (x_axis, cols)
def pytrends_pull(self, query: list, query_loc: str, start_yr: int, start_mo: int, end_yr: int, end_mo: int, type: str): """ pytrends_pull(query:str, query_loc:str, start_yr:int, start_mo:int, end_yr:int, end_mo:int) Generates .csv file in raw_data folder with tweets based on query """ geocode = self.us_states[query_loc.lower()]['abbr'] #creating connection to trend.google.com pytrend = TrendReq(timeout=(10, 25)) startdate = datetime.datetime(year=start_yr, month=start_mo, day=1) enddate = datetime.datetime(year=end_yr, month=end_mo, day=calendar.monthrange(end_yr, end_mo)[1], hour=23) if type == 'hour': historical_interest = pytrend.get_historical_interest( keywords=query, cat=0, geo=geocode, year_start=start_yr, month_start=start_mo, day_start=1, hour_start=0, year_end=end_yr, month_end=end_mo, day_end=calendar.monthrange(end_yr, end_mo)[1], hour_end=0) if type == 'day': timeframe = '{} {}'.format(startdate.strftime("%Y-%m-%d"), enddate.strftime("%Y-%m-%d")) pytrend.build_payload(kw_list=query, timeframe=timeframe) historical_interest = pytrend.interest_over_time() if type == 'week': pytrend.build_payload(kw_list=query) historical_interest = pytrend.interest_over_time() return historical_interest
def gtrend_getvalue(kw_list,output_file,timeframe): """ ライブラリを使用してGoogleTrendsからデータを取得する。 #pytrends ref https://pypi.org/project/pytrends/#interest-by-region """ try: sp = kw_list[0] pytrends = TrendReq(hl='ja-JP', tz=360) pytrends.build_payload(kw_list, cat=0, timeframe=timeframe, geo='JP', gprop='') #関連キーワード trendsdata = pytrends.related_queries() o = output_file s = sp + 'query' exportdata(trendsdata,o,s,1) #関連トピック trendsdata = pytrends.related_topics() s = sp + 'topic' exportdata(trendsdata,o,s,1) #地域別の関心 trendsdata = pytrends.interest_by_region(resolution='REGION', inc_low_vol=True, inc_geo_code=False) s = sp + 'region' exportdata(trendsdata,o,s,0) #時系列 trendsdata = pytrends.interest_over_time() s = sp + 'overtime' exportdata(trendsdata,o,s,0) #サジェスト trendsdata = pytrends.suggestions(sp) s = sp + 'suggestions' suggest_to_excel(trendsdata,o,s) #注目キーワード #trendsword = pytrends.trending_searches(pn='united_states') #アメリカ #trendsword = pytrends.trending_searches(pn='japan') #日本 #s = "trendword" #f = exportdata(trendsword,o,s,0) except Exception as e: t, v, tb = sys.exc_info() print(traceback.format_exception(t,v,tb)) print(traceback.format_tb(e.__traceback__))
def google_index(word="python", start_date="2019-12-01", end_date="2019-12-04", plot=True): """ 返回指定区间的谷歌指数 """ pytrends = TrendReq(hl="en-US", tz=360) kw_list = [word] pytrends.build_payload(kw_list, cat=0, timeframe=start_date + " " + end_date, geo="", gprop="") search_df = pytrends.interest_over_time() if plot: search_df[word].plot() plt.legend() plt.show() return search_df[word] return search_df[word]
def get_search_interest_over_time(keyword_list, country_iso2, timeframe='today 3-m'): from pytrends.request import TrendReq pytrend = TrendReq() pytrend.build_payload(keyword_list, cat=0, timeframe=timeframe, geo=country_iso2, gprop='') res = pytrend.interest_over_time() array = res.to_numpy() # sum all scores for the past 5 years sum = array.sum(axis=0)[0] if (array.size > 0) else 0 # count how many score points we have scoresCount = array.shape[0] avg = sum / scoresCount if (scoresCount != 0) else 0 return avg
def daily_google_interests(currData): data_list = [] pytrends = TrendReq(hl='tr-TR', tz=360) pytrends.build_payload( kw_list=["dolar"], cat=0, timeframe='now 1-d', geo='TR', gprop='') data = pytrends.interest_over_time() data_frame = pd.DataFrame(data)['dolar'] for a, b in data_frame.items(): data_list.append(float(b)) old_min = min(data_list) old_max = max(data_list) new_min = min(currData) new_max = max(currData) converted_list = [] for item in data_list: converted_list.append(((item - old_min) / (old_max - old_min)) * (new_max - new_min) + new_min) return converted_list
def build_my_payload(qlist, timeframe, pytrendobj=None): ntries=5 for i in range(ntries): try: print('Try number %d: Building payload with qlist "%s" and timeframe "%s"' % (i+1, qlist, timeframe)) if not pytrendobj: custom_useragent = random_word(8) print(' Building TrendReq() object from scratch with custom_useragent %s' % custom_useragent) pytrendobj = TrendReq(google_username, google_password, custom_useragent=random_word(8)) # wait some time to keep it from getting blocked sleeptime = (2**i)*random.randint(1,10) # exponential random backoff print('Sleeping %d seconds exponential random backoff to avoid getting blocked' % sleeptime) time.sleep(sleeptime) print(' Building payload with qlist "%s" and timeframe "%s"' % (qlist, timeframe)) pytrendobj.build_payload(kw_list=qlist, timeframe=timeframe) return pytrendobj except: print(traceback.format_exc()) print(" Failed to build payload, probably couldn't get token, trying again...") print(" Failed to build payload after %d tries, giving up") return None
def get_searches(key_word, state): pytrends = TrendReq(hl='en-US', tz=360) pytrends.build_payload([key_word], cat=0, timeframe='2020-02-01 2020-03-10', gprop='', geo='US-{}'.format(state)) df = pytrends.interest_over_time() print(df.head()) sns.set() df['timestamp'] = pd.to_datetime(df.index) sns.lineplot(df['timestamp'], df[key_word]) plt.title( "Normalized Searches for Coronavirus in NY (blue), MA (orange), and CA (green)" .format(key_word, state)) plt.ylabel("Number of Searches") plt.xlabel("Date") plt.xticks(rotation=45)
class DataLoader: def __init__(self, key): self.stock = yfinance.Ticker(key) self.hist = self.stock.history(period="max") self.feature_selection() self.pytrend = TrendReq() def printhist(self): print(self.hist) def gtrends(self): # form google trends search_keys = ["china", "trump"] self.pytrend.build_payload(kw_list=search_keys, timeframe='all') interest_over_time = self.pytrend.interest_over_time() ts = interest_over_time.reset_index(col_fill="date", inplace=False) sns.lineplot(x="date", y=search_keys[0], data=ts) plt.show() def feature_selection(self): self.hist.drop(['Dividends', 'Stock Splits'], axis=1, inplace=True)
from pytrends.request import TrendReq # Login to Google. Only need to run this once, the rest of requests will use the same session. pytrend = TrendReq() # Create payload and capture API tokens. Only needed for interest_over_time(), interest_by_region() & related_queries() pytrend.build_payload(kw_list=['pizza', 'bagel']) # Interest Over Time interest_over_time_df = pytrend.interest_over_time() print(interest_over_time_df.head()) # Interest by Region interest_by_region_df = pytrend.interest_by_region() print(interest_by_region_df.head()) # Related Queries, returns a dictionary of dataframes related_queries_dict = pytrend.related_queries() print(related_queries_dict) # Get Google Hot Trends data trending_searches_df = pytrend.trending_searches() print(trending_searches_df.head()) # Get Google Top Charts top_charts_df = pytrend.top_charts(cid='actors', date=201611) print(top_charts_df.head()) # Get Google Keyword Suggestions suggestions_dict = pytrend.suggestions(keyword='pizza')
def test_top_charts(self): pytrend = TrendReq() pytrend.build_payload(kw_list=['pizza', 'bagel']) self.assertIsNotNone(pytrend.top_charts(cid='actors', date=201611))
## FIRST RUN ## # Login to Google. Only need to run this once, the rest of requests will use the same session. pytrend = TrendReq() # Run the first time (if we want to start from today, otherwise we need to ask for an end_date as well today = datetime.today().date() old_date = today # Go back in time new_date = today - timedelta(days=step) # Create new timeframe for which we download data timeframe = new_date.strftime('%Y-%m-%d')+' '+old_date.strftime('%Y-%m-%d') pytrend.build_payload(kw_list=kw_list, timeframe = timeframe) interest_over_time_df = pytrend.interest_over_time() ## RUN ITERATIONS while new_date>start_date: ### Save the new date from the previous iteration. # Overlap == 1 would mean that we start where we # stopped on the iteration before, which gives us # indeed overlap == 1. old_date = new_date + timedelta(days=overlap-1) ### Update the new date to take a step into the past # Since the timeframe that we can apply for daily data # is limited, we use step = maxstep - overlap instead of
def test_build_payload(self): """Should return the widgets to get data""" pytrend = TrendReq() pytrend.build_payload(kw_list=['pizza', 'bagel']) self.assertIsNotNone(pytrend.token_payload)
def test_interest_by_region(self): pytrend = TrendReq() pytrend.build_payload(kw_list=['pizza', 'bagel']) self.assertIsNotNone(pytrend.interest_by_region())
def test_trending_searches(self): pytrend = TrendReq() pytrend.build_payload(kw_list=['pizza', 'bagel']) self.assertIsNotNone(pytrend.trending_searches(pn='p1'))
def test_related_queries(self): pytrend = TrendReq() pytrend.build_payload(kw_list=['pizza', 'bagel']) self.assertIsNotNone(pytrend.related_queries())
def get_google_trends(self, kw_list, trdays=250, overlap=100, cat=0, geo='', tz=360, gprop='', hl='en-US', sleeptime=1, isPartial_col=False, from_start=False, scale_cols=True): """Retrieve daily google trends data for a list of search terms Parameters ---------- kw_list : list of search terms (max 5)- see pyTrends for more details trdays : the number of days to pull data for in a search (the max is around 270, though the website seems to indicate 90) overlap : the number of overlapped days when stitching two searches together cat : category to narrow results - see pyTrends for more details geo : two letter country abbreviation (e.g 'US', 'UK') default is '', which returns global results - see pyTrends for more details tz : timezone offset (default is 360, which corresponds to US CST - see pyTrends for more details) grop : filter results to specific google property available options are 'images', 'news', 'youtube' or 'froogle' default is '', which refers to web searches - see pyTrends for more details hl : language (e.g. 'en-US' (default), 'es') - see pyTrends for more details sleeptime : when stiching multiple searches, this sets the period between each isPartial_col : remove the isPartial column (default is True i.e. column is removed) from_start : when stitching multiple results, this determines whether searches are combined going forward or backwards in time (default is False, meaning searches are stitched with the most recent first) scale_cols : google trend searches traditionally returns scores between 0 and 100 stitching could produce values greater than 100 by setting this to True (default), the values will range between 0 and 100 Returns ------- pandas Dataframe Notes ----- This method is essentially a highly restricted wrapper for the pytrends package Any issues/questions related to its use would probably be more likely resolved by consulting the pytrends github page https://github.com/GeneralMills/pytrends """ if len(kw_list)>5 or len(kw_list)==0: raise ValueError("The keyword list can contain at most 5 words") if trdays>270: raise ValueError("trdays must not exceed 270") if overlap>=trdays: raise ValueError("Overlap can't exceed search days") stich_overlap = trdays - overlap from_date = datetime.datetime.strptime(self.from_date, '%Y-%m-%d') to_date = datetime.datetime.strptime(self.to_date, '%Y-%m-%d') n_days = (to_date - from_date).days # launch pytrends request _pytrends = TrendReq(hl=hl, tz=tz) # get the dates for each search if n_days <= trdays: trend_dates = [' '.join([self.from_date, self.to_date])] else: trend_dates = ['{} {}'.format( (to_date - datetime.timedelta(i+trdays)).strftime("%Y-%m-%d"), (to_date - datetime.timedelta(i)).strftime("%Y-%m-%d")) for i in range(0,n_days-trdays+stich_overlap, stich_overlap)] if from_start: trend_dates = trend_dates[::-1] try: _pytrends.build_payload(kw_list, cat=cat, timeframe=trend_dates[0], geo=geo, gprop=gprop) except Exception as e: return pd.DataFrame({"error":e}, index=[0]) output = _pytrends.interest_over_time().reset_index() if len(output)==0: return pd.DataFrame({"error":'search term returned no results (insufficient data)'}, index=[0]) for date in trend_dates[1:]: time.sleep(sleeptime) try: _pytrends.build_payload(kw_list, cat=cat, timeframe=date, geo=geo, gprop=gprop) except Exception as e: return pd.DataFrame({"error":e}, index=[0]) temp_trend = _pytrends.interest_over_time().reset_index() temp_trend = temp_trend.merge(output, on="date", how="left") # it's ugly but we'll exploit the common column names # and then rename the underscore containing column names for kw in kw_list: norm_factor = np.ma.masked_invalid(temp_trend[kw+'_y']/temp_trend[kw+'_x']).mean() temp_trend[kw] = temp_trend[kw+'_x'] * norm_factor temp_trend = temp_trend[temp_trend.isnull().any(axis=1)] temp_trend['isPartial'] = temp_trend['isPartial_x'] output = pd.concat([output, temp_trend[['date', 'isPartial'] + kw_list]], axis=0) # reorder columns in alphabetical order output = output[['date', 'isPartial']+kw_list] if not isPartial_col: output = output.drop('isPartial', axis=1) output = output[output['date']>=self.from_date] if scale_cols: # the values in each column are relative to other columns # so we need to get the maximum value across the search columns max_val = float(output[kw_list].values.max()) for col in kw_list: output[col] = 100.0*output[col]/max_val output = output.sort_values('date', ascending=self.ascending).reset_index(drop=True) return output
def test_suggestions(self): pytrend = TrendReq() pytrend.build_payload(kw_list=['pizza', 'bagel']) self.assertIsNotNone(pytrend.suggestions(keyword='pizza'))
def get_trends_over_time(keywords, timeframe, cat=0, geo='GB', gprop=''): print("Attempt made") # PyTrends client pytrends = TrendReq(hl='en-US', tz=-60) # Create the payload pytrends.build_payload(keywords, cat=cat, timeframe=timeframe, geo=geo, gprop=gprop) # Interest over time trends = pytrends.interest_over_time() time.sleep(5) # Return results (dataframe) return trends