def ggl_trends(grouped, keyword): pytrends = TrendReq(hl='en-US', tz=360) kw_list = [keyword] pytrends.build_payload(kw_list, cat=0, timeframe='all', geo='US', gprop='') ggl_trends = pytrends.interest_over_time() if ggl_trends.empty: return pd.DataFrame() grouped_ggl_trends = ggl_trends.groupby(pd.Grouper(freq='1m')).mean().rename(columns={keyword: 'Google Trends'}) return grouped.merge(grouped_ggl_trends, left_index=True, right_index=True, how='inner')
class GoogleTrendStatsEvaluator(StatsSocialEvaluator): def __init__(self): super().__init__() self.pytrends = None self.is_threaded = False # Use pytrends lib (https://github.com/GeneralMills/pytrends) # https://github.com/GeneralMills/pytrends/blob/master/examples/example.py def get_data(self): self.pytrends = TrendReq(hl='en-US', tz=0) # self.pytrends.GENERAL_URL = "https://trends.google.com/trends/explore" # self.symbol key_words = [self.symbol] try: # looks like only 1 and 3 months are working ... time_frame = "today " + str(self.social_config[STATS_EVALUATOR_HISTORY_TIME]) + "-m" # Attention apparement limite de request / h assez faible self.pytrends.build_payload(kw_list=key_words, cat=0, timeframe=time_frame, geo='', gprop='') except ResponseError as e: self.logger.warn(str(e)) def eval_impl(self): interest_over_time_df = self.pytrends.interest_over_time() # compute bollinger bands self.eval_note = AdvancedManager.get_class(self.config, StatisticAnalysis).analyse_recent_trend_changes( interest_over_time_df[self.symbol], numpy.sqrt) def run(self): pass # check if history is not too high def load_config(self): super(GoogleTrendStatsEvaluator, self).load_config() if self.social_config[STATS_EVALUATOR_HISTORY_TIME] > STATS_EVALUATOR_MAX_HISTORY_TIME: self.social_config[STATS_EVALUATOR_HISTORY_TIME] = STATS_EVALUATOR_MAX_HISTORY_TIME def set_default_config(self): self.social_config = { CONFIG_REFRESH_RATE: 3600, STATS_EVALUATOR_HISTORY_TIME: 3 }
def get_data(self): self.pytrends = TrendReq(hl='en-US', tz=0) # self.pytrends.GENERAL_URL = "https://trends.google.com/trends/explore" # self.symbol key_words = [self.symbol] try: # looks like only 1 and 3 months are working ... time_frame = "today " + str(self.social_config[STATS_EVALUATOR_HISTORY_TIME]) + "-m" # Attention apparement limite de request / h assez faible self.pytrends.build_payload(kw_list=key_words, cat=0, timeframe=time_frame, geo='', gprop='') except ResponseError as e: self.logger.warn(str(e))
import pandas as pd import time from pytrends.request import TrendReq startTime = time.time() pytrends = TrendReq(hl='en-US', tz=360) colnames = ["keywords"] df = pd.read_csv("keyword_list.csv", names=colnames) df2 = df["keywords"].values.tolist() df2.remove("Keywords") dataset = [] #data set for this years data datasetLY = [] # data set for last years data for x in range(0, len(df2)): keywords = [df2[x]] pytrends.build_payload(kw_list=keywords, cat=0, timeframe='2020-09-01 2020-09-30', geo='US') data = pytrends.interest_by_region(resolution='COUNTRY', inc_low_vol=False, inc_geo_code=False) if not data.empty: # data = data.drop(labels = ['isPartial'], axis ='columns') dataset.append(data) for x in range(0, len(df2)): keywords = [df2[x]] pytrends.build_payload(kw_list=keywords,
def test_ispartial_dtype_timeframe_all(self): pytrend = TrendReq() pytrend.build_payload(kw_list=['pizza', 'bagel'], timeframe='all') df = pytrend.interest_over_time() assert ptypes.is_bool_dtype(df.isPartial)
from opinionytics.all_views.signup_view import signup_view from opinionytics.all_views.text_results_view import text_results_view from opinionytics.all_views.url_results_view import url_results_view from opinionytics.all_views.data_results_view import data_results_view # Connect to the Aylien API client = textapi.Client(TEXT_API_ID, TEXT_API_KEY) # Connect to the NLP library natural_language_understanding = NaturalLanguageUnderstandingV1( username=NLP_API_USERNAME, password=NLP_API_PASSWORD, version=NLP_API_VERSION) # Use Google Trends API pytrends = TrendReq(hl='en-US', tz=360) # Get all features from the APIs all_features = AllFeatures(client, pytrends, natural_language_understanding) # Go to the analyze view def analyze(request): return render(request, 'analyze.html') # Go to the text analyze view def analyze_text(request): return render(request, 'analyze-text.html')
import pandas as pd import pytrends import seaborn as sns import matplotlib.pyplot as plt from pytrends.request import TrendReq pytrend = TrendReq(hl='nl') KEYWORDS = ['Asperge', 'Spruitjes', 'Pompoen'] KEYWORDS_CODES = [pytrend.suggestions(keyword=i)[0] for i in KEYWORDS] df_CODES = pd.DataFrame(KEYWORDS_CODES) # print(df_CODES) EXACT_KEYWORDS = df_CODES['mid'].to_list() DATE_INTERVAL = '2020-01-01 2021-01-01' COUNTRY = [ "GB", "DE", "NL" ] #Use this link for iso country code: https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes CATEGORY = 0 # Use this link to select categories: https://github.com/pat310/google-trends-api/wiki/Google-Trends-Categories SEARCH_TYPE = '' #default is 'web searches',others include 'images','news','youtube','froogle' (google shopping) Individual_EXACT_KEYWORD = list(zip(*[iter(EXACT_KEYWORDS)] * 1)) Individual_EXACT_KEYWORD = [list(x) for x in Individual_EXACT_KEYWORD] dicti = {} i = 1 for Country in COUNTRY: for keyword in Individual_EXACT_KEYWORD: pytrend.build_payload(kw_list=keyword, timeframe=DATE_INTERVAL, geo=Country, cat=CATEGORY,
def start(): GOOGLE_USR = "" GOOGLE_PWD = "" treq = None try: GOOGLE_USR = os.environ['GOOGLE_ACCOUNT'] GOOGLE_PWD = os.environ['GOOGLE_PASSWORD'] except Exception as ex: print("GOOGLE_ACCOUNT and GOOGLE_PASSWORD not set. " + ex) try: treq = TrendReq(GOOGLE_USR, GOOGLE_PWD) except Exception as ex: print("Username or password not valid. " + ex) print("Connected") #LANGUAGES all_languages = [i.name for i in models.Language.objects.all()] languages_cnt = len(all_languages) for i in range(0, languages_cnt + 1, 5): try: print("Fetch for " + ", ".join(all_languages[i:i + 4])) treq.build_payload(kw_list=all_languages[i:i + 4]) populate_iot(treq.interest_over_time()) populate_ior(treq.interest_by_region()) except Exception as ex: print(ex) continue if languages_cnt % 5 != 0: try: slice_index = languages_cnt - int(languages_cnt / 5) * 5 print("Fetch for " + ", ".join(all_languages[-slice_index])) treq.build_payload(kw_list=all_languages[-slice_index]) populate_iot(treq.interest_over_time()) populate_ior(treq.interest_by_region()) except Exception as ex: print(ex) pass print("Interest over time language completed.") #FRAMEWORKS print("Interest over time framework started.") all_fw = [i.name for i in models.LibraryOrFramework.objects.all()] fw_cnt = len(all_fw) for i in range(0, fw_cnt + 1, 5): try: print("Fetch for " + ", ".join(all_fw[i:i + 4])) treq.build_payload(kw_list=all_fw[i:i + 4]) populate_iot_fw(treq.interest_over_time()) populate_ior_fw(treq.interest_by_region()) except Exception as ex: print(ex) continue if fw_cnt % 5 != 0: try: slice_index = fw_cnt - int(fw_cnt / 5) * 5 print("Fetch for " + ", ".join(all_fw[-slice_index])) treq.build_payload(kw_list=all_fw[-slice_index]) populate_iot_fw(treq.interest_over_time()) populate_ior_fw(treq.interest_by_region()) except Exception as ex: print(ex) pass print("Interest over time framework completed.")
import os, requests, time, operator from pytrends.request import TrendReq import plotly.express as px import pymysql.cursors from sqlalchemy import create_engine, exc ## TODO: Need to find fix so that I can find 5 related terms at a time (fewer requests) ## TODO: Focus on specific categories for more actionable insights? ## TODO: Decide if I want to keep the GEO for both functions as Canada ## TODO: Should look into "['link' 'value'] not found in axis" error during related topics lookup # Only needs to run once - all requests use this session # Timezone is 240 (could be -240 as well?) pytrends = TrendReq( hl='en-US', tz=-240, retries=2, backoff_factor=0.2, ) # Connect to the database connection = pymysql.connect(host='localhost', user='******', password=password, db='trends', charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) # create sqlalchemy engine engine = create_engine("mysql+pymysql://{user}:{pw}@localhost/{db}".format( user="******", pw=password, db="trends"))
def test_trending_searches(self): pytrend = TrendReq() pytrend.build_payload(kw_list=['pizza', 'bagel']) self.assertIsNotNone(pytrend.trending_searches())
def test_interest_by_region(self): pytrend = TrendReq() pytrend.build_payload(kw_list=['pizza', 'bagel']) self.assertIsNotNone(pytrend.interest_by_region())
def test_suggestions(self): pytrend = TrendReq() pytrend.build_payload(kw_list=['pizza', 'bagel']) self.assertIsNotNone(pytrend.suggestions(keyword='pizza'))
def get(self, request, *args, **kwargs): context = self.get_context_data() COUNTRIES = { "40": { "name": "South Africa", "postal": "ZA" }, "15": { "name": "Germany", "postal": "DE" }, "36": { "name": "Saudi Arabia", "postal": "SA" }, "30": { "name": "Argentina", "postal": "AR" }, "8": { "name": "Australia", "postal": "AU" }, "44": { "name": "Austria", "postal": "AT" }, "41": { "name": "Belgium", "postal": "BE" }, "18": { "name": "Brazil", "postal": "BR" }, "13": { "name": "Canada", "postal": "CA" }, "38": { "name": "Chile", "postal": "CL" }, "32": { "name": "Colombia", "postal": "CO" }, "23": { "name": "South Korea", "postal": "KR" }, "49": { "name": "Denmark", "postal": "DK" }, "29": { "name": "Egypt", "postal": "EG" }, "26": { "name": "Spain", "postal": "ES" }, "1": { "name": "United States", "postal": "US" }, "50": { "name": "Finland", "postal": "FI" }, "16": { "name": "France", "postal": "FR" }, "48": { "name": "Greece", "postal": "GR" }, "10": { "name": "Hong Kong", "postal": "HK" }, "45": { "name": "Hungary", "postal": "HU" }, "3": { "name": "India", "postal": "IN" }, "19": { "name": "Indonesia", "postal": "ID" }, "6": { "name": "Israel", "postal": "IL" }, "27": { "name": "Italy", "postal": "IT" }, "4": { "name": "Japan", "postal": "JP" }, "37": { "name": "Kenya", "postal": "KE" }, "34": { "name": "Malaysia", "postal": "MY" }, "21": { "name": "Mexico", "postal": "MX" }, "52": { "name": "Nigeria", "postal": "NG" }, "51": { "name": "Norway", "postal": "NO" }, "17": { "name": "Netherlands", "postal": "NL" }, "25": { "name": "Philippines", "postal": "PH" }, "31": { "name": "Poland", "postal": "PL" }, "47": { "name": "Portugal", "postal": "PT" }, "43": { "name": "Czech Republic", "postal": "CZ" }, "39": { "name": "Romania", "postal": "RO" }, "9": { "name": "United Kingdon", "postal": "GB" }, "14": { "name": "Russia", "postal": "RU" }, "5": { "name": "Singapore", "postal": "SG" }, "42": { "name": "Sweden", "postal": "SE" }, "46": { "name": "China", "postal": "CH" }, "12": { "name": "Taiwan", "postal": "TW" }, "33": { "name": "Thailand", "postal": "TH" }, "24": { "name": "Turkey", "postal": "TR" }, "35": { "name": "Ukraine", "postal": "UA" }, "28": { "name": "Vietnam", "postal": "VN" }, } pytrends = TrendReq(settings.GOOGLE_ACCT, settings.GOOGLE_PW) hottrendslist = pytrends.hottrends({'geo': 'World'}) for postal, trend in hottrendslist.items(): COUNTRIES[postal]['trending'] = trend context['countries'] = COUNTRIES return render(request, 'toptrends/main.html', context)
def test_top_charts(self): pytrend = TrendReq() pytrend.build_payload(kw_list=['pizza', 'bagel']) self.assertIsNotNone(pytrend.top_charts(cid='actors', date=201611))
def test_trending_searches(self): pytrend = TrendReq() pytrend.build_payload(kw_list=['pizza', 'bagel']) self.assertIsNotNone(pytrend.trending_searches(pn='p1'))
def test_related_queries(self): pytrend = TrendReq() pytrend.build_payload(kw_list=['pizza', 'bagel']) self.assertIsNotNone(pytrend.related_queries())
def test_interest_over_time_froogle(self): pytrend = TrendReq() pytrend.build_payload(kw_list=['pizza', 'bagel'], gprop='froogle') self.assertIsNotNone(pytrend.interest_over_time())
# The maximum for a timeframe for which we get daily data is 270. # Therefore we could go back 269 days. However, since there might # be issues when rescaling, e.g. zero entries, we should have an # overlap that does not consist of only one period. Therefore, # I limit the step size to 250. This leaves 19 periods for overlap. maxstep = 269 overlap = 40 step = maxstep - overlap + 1 kw_list = [' '] start_date = datetime(2011, 12, 9).date() ## FIRST RUN ## # Login to Google. Only need to run this once, the rest of requests will use the same session. pytrend = TrendReq() # Run the first time (if we want to start from today, otherwise we need to ask for an end_date as well today = datetime.today().date() old_date = today # Go back in time new_date = today - timedelta(days=step) # Create new timeframe for which we download data timeframe = new_date.strftime('%Y-%m-%d')+' '+old_date.strftime('%Y-%m-%d') pytrend.build_payload(kw_list=kw_list, timeframe = timeframe) interest_over_time_df = pytrend.interest_over_time() ## RUN ITERATIONS
def test_interest_by_region_city_resolution(self): pytrend = TrendReq() pytrend.build_payload(kw_list=['pizza', 'bagel']) self.assertIsNotNone(pytrend.interest_by_region(resolution='CITY'))
def test_build_payload(self): """Should return the widgets to get data""" pytrend = TrendReq() pytrend.build_payload(kw_list=['pizza', 'bagel']) self.assertIsNotNone(pytrend.token_payload)
heado = [str(x).strip() for x in range(0, 20)] heado.insert(0, "What") heado.append("Date") heado.append("Hour") csv_path = 'data/Aus_google_trends.csv' old_df = pd.read_csv(csv_path) current_day = old_df[old_df['Date'] == bris_reverse_date] if int(bris_hour) in current_day['Hour'].values.tolist(): pass print("Nup") else: pytrend = TrendReq(hl='en-US', tz=360) df = pytrend.trending_searches(pn='australia') df = df.rename(columns={0: "Google trending searches"}) melted = df.T.reset_index() melted['Date'] = bris_reverse_date melted['Hour'] = bris_hour melted.columns = heado old_df = old_df.append(melted) print(old_df) with open(csv_path, "w") as f: old_df.to_csv(f, index=False)
def graph(request): words = request.GET.get('q') if not words: return redirect('tot:index') date = request.GET.get('y') a = ',' pytrends = TrendReq() #pytrends = TrendReq(hl='ko', tz=540) if a not in words: if request.GET.get('y')=='year': date = 'today 12-m' elif request.GET.get('y')=='month': date = 'today 1-m' else: date = 'now 7-d' word1 = words list1 = word1 print(f'{date}') pytrends.build_payload(list1, cat=0, timeframe= f'{date}', geo='', gprop='') print('2222222222222222') value = pytrends.interest_over_time() del value['isPartial'] value = value.reset_index() value2 = value.to_json(force_ascii=False, orient='split', date_format='iso', date_unit='s') abc = json.loads(value2) ab = [] cd = [] for a in abc['data']: k = {} h = datetime.strptime(a[0],'%Y-%m-%dT%H:%M:%SZ') h2 = h.strftime('%Y-%m-%d %H:%M:%S') k['label'] = h2 k['y'] = a[1] k['link'] = '/anal' ab.append(k) context = {'ab':ab, 'word1':word1} return render(request, 'tot/graph.html', context) elif a in words: pass words = words.split(',') word1 = words[0] word2 = words[1] if request.GET.get('y')== 'year': date = 'today 12-m' elif request.GET.get('y') == 'month': date = 'today 1-m' else: date = 'now 7-d' list1 = [word1, word2] pytrends.build_payload(list1, cat=0, timeframe= f'{date}', geo='', gprop='') value = pytrends.interest_over_time() del value['isPartial'] value = value.reset_index() value2 = value.to_json(force_ascii=False, orient='split', date_format='iso', date_unit='s') abc = json.loads(value2) ab = [] cd = [] for a in abc['data']: k = {} z = {} h = datetime.strptime(a[0],'%Y-%m-%dT%H:%M:%SZ') h2 = h.strftime('%Y-%m-%d %H:%M:%S') k['label'] = h2 k['y'] = a[1] k['link'] = '/anal' ab.append(k) z['label'] = h2 z['y'] = a[2] z['link'] = '/anal' cd.append(z) context = {'ab':ab, 'cd':cd, 'word1':word1, 'word2':word2} return render(request, 'tot/graph.html', context)
def main(): # Set up api wrapper pytrends = TrendReq(hl='en-US', tz=360) # Limit of 5 keywords kw_list = ["Steelcase"] # Build pipeline pytrends.build_payload(kw_list, cat=0, timeframe='all', geo='', gprop='') # Get overall interest over the entire timeline interestDF = pytrends.interest_over_time() interestDF.to_csv( "C:\\Users\\gwang\\Documents\\01 ADS Projects\\GoogleTrends5YearInterest_test.csv", index=True) print(interestDF.head()) print() # Sleep 60 prevents you from being rate limited # Get hourly interest over the time set hourlyDF = pytrends.get_historical_interest(kw_list, year_start=2019, month_start=7, day_start=1, hour_start=0, year_end=2019, month_end=7, day_end=1, hour_end=1, cat=0, geo='', gprop='', sleep=60) hourlyDF.to_csv( "C:\\Users\\gwang\\Documents\\01 ADS Projects\\GoogleTrends5YearHourlyInterest_test.csv", index=True) print(hourlyDF.head()) print() # Get regional interest across the world # Can switch to state or city specific regionDF = pytrends.interest_by_region(resolution='COUNTRY', inc_low_vol=True, inc_geo_code=False) regionDF.to_csv( "C:\\Users\\gwang\\Documents\\01 ADS Projects\\GoogleTrendsRegionInterest_test.csv", index=True) print(regionDF.head()) print() # Get rising related topics risingDF = pytrends.related_topics().get('Steelcase').get('rising') risingDF.to_csv( "C:\\Users\\gwang\\Documents\\01 ADS Projects\\GoogleTrendsRisingRelated_test.csv", index=True) # Get top related topics topDF = pytrends.related_topics().get('Steelcase').get('top') topDF.to_csv( "C:\\Users\\gwang\\Documents\\01 ADS Projects\\GoogleTrendsTopRelated_test.csv", index=True) print(risingDF.head()) print() print(topDF.head())
#from pandas.io.json.json_normalize import nested_to_record #!-*- coding: utf8 -*- import pandas as pd from pytrends.request import TrendReq import matplotlib.pyplot as plt import numpy as np from datetime import datetime '''pytrends = TrendReq() pytrends.build_payload(['suicide','自杀'],cat=0,timeframe='2003-01-01 2015-12-30',geo='',gprop='') print(pytrends.interest_over_time())''' pytrends = TrendReq() kw_list = ["suicide", "自杀", "自殺"] class google_trends: def collect(self, period, timestr): result = [0, 0, 0] for i, word in enumerate(kw_list): pytrends.build_payload([word], cat=0, timeframe=period, geo='HK', gprop='') df = pytrends.interest_over_time() print(df) result[i] = df[word].values.reshape(-1, 1).tolist() print(result[i]) df.to_csv('./test%s.%s.csv' % (timestr, word), index=True) data = pd.read_csv('./test%s.%s.csv' % (timestr, word)) x = data.iloc[:, 0] y = df.iloc[:, 0] / 100
language = 'en-US' timezone = 360 query_terms = args.terms # time span for search # times = 'YYYY-MM-DD YYYY-MM-DD', where the first substring is the start # date and the second substring is the end date times = " ".join([args.start, args.end]) # geographic details location = 'US' # establish connection query = TrendReq(username, password, hl=language, tz=timezone, custom_useragent=None) # query setup query.build_payload(query_terms, cat=0, timeframe=times, geo='US', gprop='') # request to pandas table data = query.interest_over_time() print(data.tail()) data.to_csv('../data/google/{}.{}.csv'.format('_'.join(query_terms), times))
from pytrends.request import TrendReq import matplotlib.pyplot as plt import os # 검색 keyword, 검색 기간 입력 keyword1 = "apple iphone" keyword2 = "samsung galaxy" period = "today 5-y" # 검색기간: 최근 5년 # Google Trend 접속 및 데이터 탑재 trend_obj = TrendReq() trend_obj.build_payload(kw_list=[keyword1, keyword2], timeframe=period) # kw_list: 최대 5개 trend_df = trend_obj.interest_over_time() # 그래프 출력 plt.style.use("ggplot") plt.figure(figsize=(14, 5)) trend_df[keyword1].plot() trend_df[keyword2].plot() plt.title("Google Trends: %s vs. %s" % (keyword1, keyword2), size=15) plt.legend(loc="best") # 그래프 파일 저장 cwd = os.getcwd() output_filepath = os.path.join( cwd, "output", 'google_trend_%s_vs_%s.png' % (keyword1, keyword2)) plt.savefig(output_filepath, dpi=300) plt.show()
from pytrends.request import TrendReq kw_list = ["asuka", "asuka langley", "asuka evangelion"] trends = TrendReq(hl='en-US', tz=360, timeout=(10, 25)) trends.build_payload(kw_list, cat=0, timeframe='now 1-d', geo='', gprop='') df = trends.interest_over_time() print(trends)
from pytrends.request import TrendReq from addon import * pytrends = TrendReq(hl='en-US', tz=360) kw_list = ["covid symptoms"] pytrends.build_payload(kw_list, cat=0, timeframe='2020-02-01 2020-10-09', geo='', gprop='') # print(interest_by_city(pytrends)) print(pytrends.interest_over_time()) print(pytrends.interest_by_region(resolution="COUNTRY"))
kw_16 = 'savings association' kw_17 = 'deposit money order' kw_18 = 'deposit check' kw_19 = 'best bank accounts' kw_20 = 'small business bank' # Group the keywords accordingly kw_list_1 = [kw_1, kw_2, kw_3, kw_4, kw_5] kw_list_2 = [kw_6, kw_7, kw_8, kw_9, kw_10] kw_list_3 = [kw_11, kw_12, kw_13, kw_14, kw_15] kw_list_4 = [kw_16, kw_17, kw_18, kw_19, kw_20] kw_list = [kw_list_2, kw_list_3, kw_list_4] # Create the master google trend keyword list using kw_list_1 pytrend = TrendReq(hl='en-US') pytrend.build_payload(kw_list_1, cat=0, timeframe='all', geo='US', gprop='') df_google = pytrend.interest_over_time() df_google_annual = df_google.resample('A').mean() df_google_annual.set_index(df_google_annual.index.year, inplace=True) # Iterate through the remaining lists in kw_list and join accordingly for item in kw_list: pytrend = TrendReq(hl='en-US') pytrend.build_payload(item, cat=0, timeframe='all', geo='US', gprop='')
from helpers import get_pretty_json_string, shell from tgbich import run_tgbich from ircbich import ircbich_init_and_loop functions = {'tg': run_tgbich, 'irc': ircbich_init_and_loop} print(f"{__file__}, {__name__}: starting") from multiprocessing import Process import os print(f"{__file__}, {__name__}: pytrends: processing Trend Requests") while True: try: pytrends = TrendReq(hl='ru-RU', tz=360) break except KeyboardInterrupt as e: raise e except: traceback.print_exc() TIME_TO_SLEEP_SECONDS = 1 print("sleeping %s seconds" % str(TIME_TO_SLEEP_SECONDS)) time.sleep(TIME_TO_SLEEP_SECONDS) continue print(f"{__file__}, {__name__}: pytrends: completed.") # launch processes def launch_all(): print("processing configs")
def main(): cred = credentials.Certificate( "demand-forecast-by-hw-firebase-adminsdk-zbrw8-240f7687b3.json" ) # ダウンロードした秘密鍵 ############################################################################################################################ firebase_admin.initialize_app( cred) #認証するためのやつ、これは一回やったらシャープで無効にしてください。ローカルの場合では大丈夫 ############################################################################################################################ #おまじない####################################### options = webdriver.ChromeOptions() options.add_argument('--headless') options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') driver = webdriver.Chrome('chromedriver', options=options) driver.implicitly_wait(10) ################################################ ratios = [] ############################################################ #nify name = "http://search.nifty.com/shun/ranking.htm" driver.get(name) driver.implicitly_wait(10) html = driver.page_source.encode('utf-8') soup = BeautifulSoup(html, "html.parser") all_Ratio = soup.find_all("dd", class_="title") for ul_tag in all_Ratio: for span in ul_tag.find_all('a'): ratios.append(span.get_text()) for i in range(5): name = "http://search.nifty.com/shun/ranking" + str(i + 1) + ".htm" driver.get(name) driver.implicitly_wait(10) html = driver.page_source.encode('utf-8') soup = BeautifulSoup(html, "html.parser") all_Ratio = soup.find_all("dd", class_="title") for ul_tag in all_Ratio: for span in ul_tag.find_all('a'): ratios.append(span.get_text()) ########################################################### #d_menu name = "https://search.smt.docomo.ne.jp/result" driver.get(name) driver.implicitly_wait(10) html = driver.page_source.encode('utf-8') soup = BeautifulSoup(html, "html.parser") all_Ratio = soup.find_all("div", class_="swiper-slide") for ul_tag in all_Ratio: for span in ul_tag.find_all('a'): ratios.append(span.get_text()) ########################################################### #twitter name = "https://tr.twipple.jp/hotword/today.html" driver.get(name) driver.implicitly_wait(10) html = driver.page_source.encode('utf-8') soup = BeautifulSoup(html, "html.parser") all_Ratio = soup.find_all("div", class_="rankTtl") for ul_tag in all_Ratio: for span in ul_tag.find_all('a'): ratios.append(span.get_text()) ########################################################### driver.close email, product = read_datebase() for index, word_list in enumerate(product, start=0): ############################################################### pytrends = TrendReq(hl='en-US', tz=360) google_maillist = [] for word in word_list: # API kw_list = [word] print(kw_list) pytrends.build_payload( kw_list, cat=0, timeframe='today 1-m', geo='JP', gprop='') #1, 7 days or 1, 3, 12 months only df = pytrends.interest_over_time() #datebase作成 list_date = df.index.to_list() #日付のリスト化 list_sample = [] dff = df.diff(1) list_sample = dff[str(word)].to_list() a = list_sample[-1:] if float(*a) >= 40: google_maillist.append(word) send_datebase(str(word), list_sample[1:]) state = [list_date[1], *list_date[-1:]] send_datetime(state) ########################################################### mail_word = [] for word in word_list: for ratio in ratios: if word in ratio: mail_word.append(word) mail_word += google_maillist mail = numpy.unique(mail_word).tolist() print(str(email[index]) + ":" + str(mail)) if (str(mail) != str("[]")): send_mail(str(email[index]), str(mail))
import collect # import local collect file from pytrends.request import TrendReq if __name__ == "__main__": # Only need to run this once, the rest of requests will use the same session. pytrend = TrendReq() kw_list = ['aws', 'azure'] # Create payload and capture API tokens. Only needed for interest_over_time(), interest_by_region() & related_queries() pytrend.build_payload(kw_list=kw_list) interest_over_time_df = collect.get_interest_over_time(pytrend, kw_list) interest_by_region_df = collect.get_interest_by_region(pytrend, kw_list) related_queries_dict = collect.get_related_queries(pytrend, kw_list) trending_searches_df = collect.get_trending_searches(pytrend) today_searches_df = collect.get_today_searches() year = 2018 geo = 'DE' # GLOBAL or two letter country shortcut (e.g. DE, FR, US) language = 'en-US' timezone = 0 top_charts_df = collect.get_top_charts(year, geo, language, timezone) kw = 'azure' suggestion_dict = collect.get_suggestions(kw) historical_interest_df = collect.get_historical_interest(pytrend, kw_list)
from IPython import get_ipython get_ipython().magic('reset -sf') import datetime from datetime import datetime as dt from datetime import date import os import pathlib import colorlover as cl import plotly.graph_objs as go import chart_studio.plotly as py import plotly.express as px import pandas as pd from pytrends.request import TrendReq APP_PATH = str(pathlib.Path(__file__).parent.resolve()) pytrends = TrendReq(hl='en-US', tz=360, retries=2, backoff_factor=0.1) #%% [markdown] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~Bat soup theory~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~# today=datetime.date(2020,4,26) search_time='2020-01-01 '+str(today) searches_bat=[ 'bat soup', 'coronavirus bat soup', 'china bat soup', 'chinese bat soup', 'wuhan bat soup', 'bat soup virus', ] groupkeywords = list(zip(*[iter(searches_bat)]*1)) groupkeywords = [list(x) for x in groupkeywords] # Download search interest of bat key words
def get_pytrend(): """Get pytrends to scrap Google Trends data""" pytrends = TrendReq( hl='en-US', #language tz=360) #timezone (US CST is 360) return pytrends
import pandas as pd from pytrends.request import TrendReq final = pd.read_csv('./csv/14_final.csv') # date=2015-02-01%202015-03-01&geo=KR&q=%ED%82%B9%EC%8A%A4%EB%A7%A8 base = 'https://trends.google.co.kr/trends/explore?cat=34&' pytrend = TrendReq() for index, row in final.iterrows(): movie_name = getattr(row, 'movie_name') nation = getattr(row, 'nation') distribution = getattr(row, 'distribution') director_average = getattr(row, 'director_average') actor_sum = getattr(row, 'actor_sum') rate = getattr(row, 'rate') genre = getattr(row, 'genre') release_date = getattr(row, 'release_date') day1_screen = getattr(row, 'day1_screen') day1_audience = getattr(row, 'day1_audience') day14_date = getattr(row, 'day15_date') day14_audience = getattr(row, 'day15_audience') pytrend.build_payload(kw_list=[movie_name, '영화'], timeframe='%s %s' % (release_date, day14_date), geo='KR', cat='34') interest_time_over_time = pytrend.interest_over_time()
def test_interest_over_time_bad_gprop(self): pytrend = TrendReq() with self.assertRaises(ValueError): pytrend.build_payload(kw_list=['pizza', 'bagel'], gprop=' ')
def getgoogledata(): languages = ['Eng', 'Ger', 'Esp'] categories = ['Overview', 'Regulation', 'Sanctions', 'Situation'] dictctry = { 'Eng': { 'Canada': "CA", 'Australia': "AU", 'India': "IN", 'Pakistan': "PK", 'Ireland': "IE", 'United States': "US", 'New Zealand': "NZ", 'South Africa': "ZA", 'Singapore': "SG", "Sierra Leone": "SL", "Liberia": "LR", "Ghana": "GH", "Nigeria": "NG", "Cameroon": "CM", "Ethiopia": "ET", "Uganda": "UG", "Rwanda": "RW", "Kenya": "KE", "Tanzania": "TZ", "Malawi": "MW", "Zambia": "ZM", "Zimbabwe": "ZW", "Botswana": "BW", "United Kingdom": "GB", "Sudan": "SD", "South Sudan": "SS", "Nambia": "NA" }, #https://de.m.wikipedia.org/wiki/Datei:Official_languages_in_Africa.svg 'Ger': { 'Switzerland': "CH", 'Austria': "AT", 'Germany': "DE" }, 'Esp': { 'Bolivia': "BO", 'Uruguay': "UY", 'Paraguay': "PY", 'Venezuela': "VE", 'Colombia': "CO", 'Ecuador': "EC", 'Peru': "PE", 'Chile': "CL", 'Argentina': "AR", 'Mexico': "MX", 'Spain': 'ES' } } ctrylist = [] for lang in languages: ctrylist += list(dictctry[lang]) weights = { 'Political Regulation': 16, 'Political Sanctions': 33, 'Political Situation': 45 } # create dictionary with relevant keywords kw_dict = { 'Eng': { 'Political Regulation': [ "bureaucracy", "Environmental protection", "Regulations", "taxes", "property rights", 'expropriation' ], 'Political Sanctions': [ "exchange", "foreign investment", "quotas", 'restrictions', 'subsidies', 'tariffs', "transferability", "sanctions" ], 'Political Situation': [ "central bank", "corruption", "instability", "judiciary", "nationalization", "protectionism", "revolt", 'social conflict', "strike", "terrorism", "war" ], 'Corruption': ['corruption'] }, 'Ger': { 'Political Regulation': [ "Bürokratie", "Umweltschutz", "Regulierung", "Steuern", "Urheberrecht", 'Enteignung' ], 'Political Sanctions': [ "Transaktionen", "foreign investment", "Einfuhrzoll", 'Beschränkungen', 'Subventionen', "Sanktionen" ], 'Political Situation': [ "Zentralbank", "Korruption", "Instabilität", "Justiz", "Nationalismus", "Protektionismus", "Aufstände", 'Staatsgewalt', "Streik", "Terrorismus", "Krieg" ], 'Corruption': ['Korruption'] }, 'Esp': { 'Political Regulation': [ "burocracia", "protección del medio ambiente", "reglamento", "impuesto", "regulaciones" ], 'Political Sanctions': [ "sanciones", "restricciones", "convertibilidad", "cuotas", "subsidio", "tarifas", 'cambiario', 'inversion extranjera', 'expropiación', "derecho de propiedad" ], 'Political Situation': [ "judicial", "situación política", "corrupcion", "conflicto social", "revuelta", "inestabilidad", "nacionalismo", 'gobierno militar', "proteccionismo", "huelga", "terrorismo", "guerra", "banco central" ], 'Corruption': ['corrupcion'] } } # start pytrend and create DataFrame for all country data pytrends = TrendReq(hl='en-EN', tz=360, timeout=(100)) #dryrun pytrend - with manually fixed first search term and geo pytrends.build_payload( ['bureaucracy'], cat=0, timeframe= f'2006-01-01 {datetime.datetime.today().strftime("%Y-%m-%d")}', geo='CA', gprop='') #build data extraction references dry_df = pytrends.interest_over_time() filt = dry_df[ 'isPartial'] == 'True' # create filter to get isPartial column dry_df.drop( index=dry_df[filt].index, inplace=True) # drop all indexes where values are inPartial = True dry_df.drop(columns=['isPartial'], inplace=True) # drop column isPartial all_ctys_df = pd.DataFrame(columns=[list( dictctry[languages[0]])[0]]) #grab first country name for first column weighted_sbpri_df = pd.DataFrame( columns=ctrylist) # create dataframe for weigthed values weighted_regulation_df = pd.DataFrame( columns=ctrylist) # create dataframe for weigthed values weighted_situation_df = pd.DataFrame(columns=ctrylist) weighted_sanctions_df = pd.DataFrame(columns=ctrylist) #map zero values to weighted sbpri - this ensure that we can add the weigthed values of each category weighted_sbpri_df[ctrylist[0]] = dry_df['bureaucracy'] * 0 weighted_sbpri_df = weighted_sbpri_df.fillna(0) #first loop over Languages for lang in languages: #Second loop over Categories and Respective Terms for that languages for tpc, terms in kw_dict[lang].items(): all_terms_df = pd.DataFrame(columns=[terms[0]]) # Third run Pytrend per Country in that language with the respective categories and terms for country, abbreviation in dictctry[lang].items(): whole_df = pd.DataFrame(columns=[terms[0] ]) # create empty dataframe # if country not in ['New Zealand', 'Ireland', 'Bolivia', 'Uruguay', 'Paraguay']: #countries where search terms have no data for i in terms: #loop for data collection from google for each individual word in kw_list try: pytrends.build_payload( [i], cat=0, timeframe= f'2006-01-01 {datetime.datetime.today().strftime("%Y-%m-%d")}', geo=abbreviation, gprop='') #build data extraction references df = pytrends.interest_over_time( ) #create initial dataframe # filt = df['isPartial'] == 'True' # create filter to get isPartial column # df.drop(index=df[filt].index, inplace=True) # drop all indexes where values are inPartial = True df.drop(columns=['isPartial'], inplace=True) # drop column isPartial whole_df[i] = df[i] #add column to whole_df dataframe time.sleep(random.uniform(2, 5)) print(country + ' / ' + i) except: print("error " + country + ' / ' + i) pass # After Thrid Loop Correct data Per Country # seasonal correction to each search term since words have different seasonality (trabjar) searched more beginning of year other words much less in the beginning delta_df = whole_df delta_df.dropna(axis=1, inplace=True) delta_df.replace(0, 0.1, inplace=True) for i in delta_df.columns: result_mul = seasonal_decompose(delta_df[i], model='multiplicative', extrapolate_trend='freq') delta_df[i] = (delta_df[i] / result_mul.seasonal) # create change deltas for search term !! check if this may ruin data !! delta_df = delta_df - delta_df.min() delta_df = delta_df / delta_df.max() * 100 # set delta_df which is delta per word and seasonally corrected as whole_df whole_df = delta_df # create mean column for every country x = whole_df[terms[ 0]] * 0 # create dataframe with same amount of rows as whole_df and 0 values for i in whole_df.columns: x += whole_df[i] if tpc == list(kw_dict[lang])[0]: all_terms_df[i] = whole_df[i] x = x / len(whole_df.columns) whole_df['Mean'] = x # After Data correcting and Mean creation add Categories to weighted df with the necessary weights if tpc == list(kw_dict[lang])[0]: weighted_sbpri_df[country] += weights[list( kw_dict[lang])[0]] * whole_df['Mean'] weighted_regulation_df[country] = whole_df['Mean'] if tpc == list(kw_dict[lang])[1]: weighted_sbpri_df[country] += weights[list( kw_dict[lang])[1]] * whole_df['Mean'] weighted_sanctions_df[country] = whole_df['Mean'] if tpc == list(kw_dict[lang])[2]: weighted_sbpri_df[country] += weights[list( kw_dict[lang])[2]] * whole_df['Mean'] weighted_situation_df[country] = whole_df['Mean'] weighted_sbpri_df = weighted_sbpri_df.applymap( lambda x: x / sum(weights.values())) # push new data to database Data.objects.all().delete() # delete data in database for country in weighted_sbpri_df: for row in weighted_sbpri_df.index: Data.objects.create(date=row, country=country, category='SBPRI', value=weighted_sbpri_df[country][row]) for country in weighted_regulation_df: for row in weighted_regulation_df.index: Data.objects.create(date=row, country=country, category=categories[1], value=weighted_regulation_df[country][row]) for country in weighted_sanctions_df: for row in weighted_sanctions_df.index: Data.objects.create(date=row, country=country, category=categories[2], value=weighted_sanctions_df[country][row]) for country in weighted_situation_df: for row in weighted_situation_df.index: Data.objects.create(date=row, country=country, category=categories[3], value=weighted_situation_df[country][row])
from pytrends.request import TrendReq pytrends = TrendReq(hl='en-US', tz=360) kw_list = ["Ripple"] pytrends.build_payload(kw_list, cat=0, timeframe='now 7-d', geo='', gprop='') df = pytrends.interest_over_time() df['Ripple'].plot(kind='line')
google_username = "******" google_password = "******" path = "" today = datetime.datetime.today().strftime('%y%m%d_%H%M_') in_regions = ["IN-AN", "IN-AP", "IN-AS", "IN-BR", "IN-CH", "IN-CT", "IN-DL", "IN-GA", "IN-GJ", "IN-HP", "IN-HR", "IN-JH", "IN-JK", "IN-KA", "IN-KL", "IN-MH", "IN-MN", "IN-MP", "IN-OR", "IN-PB", "IN-PY", "IN-RJ", "IN-TG", "IN-TN", "IN-UP", "IN-UT", "IN-WB"] # connect to Google pytrend = TrendReq(google_username, google_password) # India for MRI trend_payload = {'q': 'mri, sonography, ultrasound', 'geo': "IN"} print(trend_payload) trend = pytrend.trend(trend_payload, return_type = 'dataframe') time.sleep(random.uniform(10, 15)) file_name = "data/" + today + "mri_IN_.csv" trend.to_csv(file_name) # India for xray trend_payload = {'q': 'xray, x-ray, sonography, ultrasound', 'geo': "IN"} print(trend_payload) trend = pytrend.trend(trend_payload, return_type = 'dataframe') time.sleep(random.uniform(10, 15))
from pytrends.request import TrendReq #connect to Mongo DB Server # client =MongoClient("mongodb://*****:*****@sdgp1-fmfys.mongodb.net/whatIsSearchedToday?authSource=admin&replicaSet=sdgp1-shard-0&readPreference=primary&appname=MongoDB%20Compass%20Community&ssl=true' ) db = client.test #test connection if connected #establishing connection with google trends try: pytrend = TrendReq() except: pytrend = TrendReq() clientdatabaseName = 'whatIsSearchedToday' today_searches_df = pytrend.today_searches() db = client[ clientdatabaseName] #open database if not available will be created db.whatissearchedtodays.drop() #delete collection to have fresh collection whatIsSearchedTodayCollection = db[ 'whatissearchedtodays'] #new collection for what is searched today df = pd.DataFrame(today_searches_df) try: df = df.head(6) except: print("Less Than 6 values")
def get_google_trends(self, kw_list, trdays=250, overlap=100, cat=0, geo='', tz=360, gprop='', hl='en-US', sleeptime=1, isPartial_col=False, from_start=False, scale_cols=True): """Retrieve daily google trends data for a list of search terms Parameters ---------- kw_list : list of search terms (max 5)- see pyTrends for more details trdays : the number of days to pull data for in a search (the max is around 270, though the website seems to indicate 90) overlap : the number of overlapped days when stitching two searches together cat : category to narrow results - see pyTrends for more details geo : two letter country abbreviation (e.g 'US', 'UK') default is '', which returns global results - see pyTrends for more details tz : timezone offset (default is 360, which corresponds to US CST - see pyTrends for more details) grop : filter results to specific google property available options are 'images', 'news', 'youtube' or 'froogle' default is '', which refers to web searches - see pyTrends for more details hl : language (e.g. 'en-US' (default), 'es') - see pyTrends for more details sleeptime : when stiching multiple searches, this sets the period between each isPartial_col : remove the isPartial column (default is True i.e. column is removed) from_start : when stitching multiple results, this determines whether searches are combined going forward or backwards in time (default is False, meaning searches are stitched with the most recent first) scale_cols : google trend searches traditionally returns scores between 0 and 100 stitching could produce values greater than 100 by setting this to True (default), the values will range between 0 and 100 Returns ------- pandas Dataframe Notes ----- This method is essentially a highly restricted wrapper for the pytrends package Any issues/questions related to its use would probably be more likely resolved by consulting the pytrends github page https://github.com/GeneralMills/pytrends """ if len(kw_list)>5 or len(kw_list)==0: raise ValueError("The keyword list can contain at most 5 words") if trdays>270: raise ValueError("trdays must not exceed 270") if overlap>=trdays: raise ValueError("Overlap can't exceed search days") stich_overlap = trdays - overlap from_date = datetime.datetime.strptime(self.from_date, '%Y-%m-%d') to_date = datetime.datetime.strptime(self.to_date, '%Y-%m-%d') n_days = (to_date - from_date).days # launch pytrends request _pytrends = TrendReq(hl=hl, tz=tz) # get the dates for each search if n_days <= trdays: trend_dates = [' '.join([self.from_date, self.to_date])] else: trend_dates = ['{} {}'.format( (to_date - datetime.timedelta(i+trdays)).strftime("%Y-%m-%d"), (to_date - datetime.timedelta(i)).strftime("%Y-%m-%d")) for i in range(0,n_days-trdays+stich_overlap, stich_overlap)] if from_start: trend_dates = trend_dates[::-1] try: _pytrends.build_payload(kw_list, cat=cat, timeframe=trend_dates[0], geo=geo, gprop=gprop) except Exception as e: return pd.DataFrame({"error":e}, index=[0]) output = _pytrends.interest_over_time().reset_index() if len(output)==0: return pd.DataFrame({"error":'search term returned no results (insufficient data)'}, index=[0]) for date in trend_dates[1:]: time.sleep(sleeptime) try: _pytrends.build_payload(kw_list, cat=cat, timeframe=date, geo=geo, gprop=gprop) except Exception as e: return pd.DataFrame({"error":e}, index=[0]) temp_trend = _pytrends.interest_over_time().reset_index() temp_trend = temp_trend.merge(output, on="date", how="left") # it's ugly but we'll exploit the common column names # and then rename the underscore containing column names for kw in kw_list: norm_factor = np.ma.masked_invalid(temp_trend[kw+'_y']/temp_trend[kw+'_x']).mean() temp_trend[kw] = temp_trend[kw+'_x'] * norm_factor temp_trend = temp_trend[temp_trend.isnull().any(axis=1)] temp_trend['isPartial'] = temp_trend['isPartial_x'] output = pd.concat([output, temp_trend[['date', 'isPartial'] + kw_list]], axis=0) # reorder columns in alphabetical order output = output[['date', 'isPartial']+kw_list] if not isPartial_col: output = output.drop('isPartial', axis=1) output = output[output['date']>=self.from_date] if scale_cols: # the values in each column are relative to other columns # so we need to get the maximum value across the search columns max_val = float(output[kw_list].values.max()) for col in kw_list: output[col] = 100.0*output[col]/max_val output = output.sort_values('date', ascending=self.ascending).reset_index(drop=True) return output
from pytrends.request import TrendReq # Login to Google. Only need to run this once, the rest of requests will use the same session. pytrend = TrendReq() # Create payload and capture API tokens. Only needed for interest_over_time(), interest_by_region() & related_queries() pytrend.build_payload(kw_list=['pizza', 'bagel']) # Interest Over Time interest_over_time_df = pytrend.interest_over_time() print(interest_over_time_df.head()) # Interest by Region interest_by_region_df = pytrend.interest_by_region() print(interest_by_region_df.head()) # Related Queries, returns a dictionary of dataframes related_queries_dict = pytrend.related_queries() print(related_queries_dict) # Get Google Hot Trends data trending_searches_df = pytrend.trending_searches() print(trending_searches_df.head()) # Get Google Top Charts top_charts_df = pytrend.top_charts(cid='actors', date=201611) print(top_charts_df.head()) # Get Google Keyword Suggestions suggestions_dict = pytrend.suggestions(keyword='pizza')