Пример #1
0
    def test_get_historical_interest(self):
        from pytrends.request import TrendReq

        # Login to Google. Only need to run this once, the rest of requests will use the same session.
        pytrend = TrendReq()
        kw = ['zuckerberg', 'facebook stock']

        df = pytrend.get_historical_interest([kw[0]],
                                             year_start=2018,
                                             month_start=5,
                                             day_start=18,
                                             year_end=2019,
                                             month_end=1,
                                             day_end=5,
                                             sleep=1)

        # Retrieve terms individually so their popularity values aren't relative to each other!
        for k in kw[1:]:
            dfnew1 = pytrend.get_historical_interest([k],
                                                     year_start=2018,
                                                     month_start=5,
                                                     day_start=18,
                                                     year_end=2019,
                                                     month_end=1,
                                                     day_end=5,
                                                     sleep=1)
            df[k] = dfnew1[k]

        self.assertIsNotNone(df)
Пример #2
0
class Trends():
    def __init__(self, keyword):
        self.keyword = keyword
        self.pyt = TrendReq(hl='en-US',
                            tz=360,
                            timeout=(10, 25),
                            retries=2,
                            backoff_factor=0.1)
        self.web = self.pyt.get_historical_interest(self.keyword,
                                                    year_start=2018,
                                                    month_start=1,
                                                    day_start=1,
                                                    hour_start=0,
                                                    cat=0,
                                                    geo='',
                                                    gprop='',
                                                    sleep=0)
        self.you = self.pyt.get_historical_interest(self.keyword,
                                                    year_start=2018,
                                                    month_start=1,
                                                    day_start=1,
                                                    hour_start=0,
                                                    cat=0,
                                                    geo='',
                                                    gprop='youtube',
                                                    sleep=0)
        self.pref()

    def pref(self):
        if self.web.mean()[self.keyword[0]] > self.you.mean()[self.keyword[0]]:
            self.preference_video = False
        else:
            self.preference_video = True

        return self.preference_video

    def get_content(self):
        if self.preference_video:
            print("Youtube Video")
            self.youtube_link = ys(self.keyword[0], max_results=5).to_json()
            self.link = eval(self.youtube_link)['videos'][0]['link']
            return "http://youtube.com" + self.link
        else:
            print("Text Content")
            self.wiki = wikipediaapi.Wikipedia('en')
            if self.wiki.page(self.keyword[0]).exists():
                self.summary = self.wiki.page(self.keyword[0]).summary
            else:
                self.summary = "No info found !"
            return self.summary
Пример #3
0
def get_means(next_five):
    pytrends = TrendReq(hl='en-US', tz=360)
    kw_list = next_five

    pytrends.build_payload(kw_list,
                           cat=0,
                           timeframe='now 1-H',
                           geo='US',
                           gprop='')
    historic_trends = pytrends.get_historical_interest(kw_list,
                                                       year_start=2018,
                                                       month_start=10,
                                                       day_start=1,
                                                       hour_start=0,
                                                       year_end=2018,
                                                       month_end=11,
                                                       day_end=1,
                                                       hour_end=0,
                                                       cat=0,
                                                       geo='US',
                                                       gprop='',
                                                       sleep=0)

    means.append(y["Chicago"].mean())
    return 0
def trendhelper():
    pt = TrendReq()

    list_of_words = []

    print('When you have entered all your words, please enter : ! ')

# Get the keywords the user wants

    words = input('Keywords : ')      
    list_of_words.append(words)

    while words != '!':
        words = input('')
        list_of_words.append(words)

# Delete  ' ! ' from the words list 

    del list_of_words[-1]
    
    print('Your words list is ready , Please wait ... ')

# get the history of search as DataFrame

    df = pt.get_historical_interest(keywords=list_of_words)
    # ,year_start=2020,month_start=1 , year_end=2021 , month_end=1
    df.to_csv('d.csv')

# Print the results 

    for i in range(len(list_of_words)):
        s = pd.read_csv('d.csv',header=0,usecols=[list_of_words[i]]).values
        print('Average search for %s  is %f' %(list_of_words[i],s.mean()))
Пример #5
0
def main():
    start_year = 2019
    start_month = 6
    pytrends = TrendReq(hl='en-US', tz=360)
    kw_list = ["bitcoin"]
    pytrends.build_payload(kw_list,
                           cat=0,
                           timeframe='today 5-y',
                           geo='',
                           gprop='')
    for y in range(start_year, 2020):
        for i in range(start_month, 12):
            month = i
            data = pytrends.get_historical_interest(kw_list,
                                                    year_start=y,
                                                    month_start=i,
                                                    day_start=1,
                                                    hour_start=1,
                                                    year_end=y,
                                                    month_end=i + 1,
                                                    day_end=1,
                                                    hour_end=0,
                                                    cat=0,
                                                    geo='',
                                                    gprop='',
                                                    sleep=60)
            print(data)
            data.to_csv(kw_list[0] + ' ' + str(y) + '-' + str(month) + '.csv')
Пример #6
0
    def get_training_data(self):
        """ load training data from google trends """

        # check if data has been downloaded
        if not os.path.isfile('./data/financial_data/google_trends.csv'):
            pytrends = TrendReq()

            # load searches per hour from google trends for the last five years
            searches = pytrends.get_historical_interest(
                keywords=[
                    'Cryptocurrency', 'Blockchain', 'Bitcoin', 'Ethereum'
                ],
                year_start=2015,
                month_start=1,
                day_start=1,
                hour_start=0,
                year_end=datetime.now().strftime('%Y'),
                month_end=datetime.now().strftime('%m'),
                day_end=datetime.now().strftime('%d'),
                hour_end=datetime.now().strftime('%H'),
            )

            # make dataframe with trends and save to csv
            searches = pd.DataFrame(searches)
            searches.to_csv('./data/financial_data/google_trends.csv',
                            index=False)
            return searches
        else:
            return pd.read_csv('./data/financial_data/google_trends.csv')
Пример #7
0
def hourly_test(keywords):
    getter = TrendReq(backoff_factor=0.2)
    for region in REGIONS:
        print(region)
        df = getter.get_historical_interest(keywords,
                year_start=2018, month_start=1, day_start=1, hour_start=0,
                year_end=2018, month_end=2, day_end=1, hour_end=0, cat=0,
                geo=region, gprop='youtube', sleep=60)
        df.to_csv('hourly_test.csv')
Пример #8
0
def test_tz_param():
    print('testing tz...')
    pytrends = TrendReq(hl='en-US', tz=0, geo='')
    interest_over_time_df = pytrends.get_historical_interest(['Bitcoin'],
                                                             year_start=2018,
                                                             month_start=1,
                                                             day_start=1,
                                                             hour_start=0,
                                                             year_end=2018,
                                                             month_end=2,
                                                             day_end=15,
                                                             hour_end=0,
                                                             cat=0,
                                                             geo='',
                                                             gprop='',
                                                             sleep=0)
    writer = pd.ExcelWriter("test.xlsx", engine='xlsxwriter')
    interest_over_time_df.to_excel(writer)
    writer.save()

    pytrends2 = TrendReq(hl='en-US', tz=360, geo='')
    interest_over_time_df_2 = pytrends2.get_historical_interest(
        ['Bitcoin'],
        year_start=2018,
        month_start=1,
        day_start=1,
        hour_start=0,
        year_end=2018,
        month_end=2,
        day_end=15,
        hour_end=0,
        cat=0,
        geo='',
        gprop='',
        sleep=0)
    writer2 = pd.ExcelWriter("test2.xlsx", engine='xlsxwriter')
    interest_over_time_df_2.to_excel(writer2)
    writer2.save()


#test_tz_param()
 def google_values(self):
     from pytrends.request import TrendReq
     kw_list = ["dock coin"]
     pytrends = TrendReq(hl='en-US', tz=360)
     x = pytrends.get_historical_interest(kw_list,
                                          year_start=2018,
                                          month_start=10,
                                          day_start=1,
                                          year_end=2018,
                                          month_end=10,
                                          day_end=20,
                                          sleep=0)
     x.to_csv(f'../dataset_files/google_trends/{kw_list[0]}.csv')
Пример #10
0
def get_trends(base_date, end_date):
    kw_list = ['crypto'] + [coin[5:] for coin in all_tickers]

    pytrends = TrendReq(hl='en-US', tz=360)

    date_window = datetime.fromtimestamp(base_date).isoformat()
    date_end = datetime.fromtimestamp(end_date).isoformat()
    i = 0

    while i <= len(all_tickers) + 1:
        trends = pytrends.get_historical_interest(
            kw_list[i:i + 3],
            year_start=int(date_window[:4]),
            month_start=int(date_window[5:7]),
            day_start=int(date_window[8:10]),
            hour_start=int(date_window[11:13]),
            year_end=int(date_end[:4]),
            month_end=int(date_end[5:7]),
            day_end=int(date_end[8:10]),
            hour_end=int(date_end[11:13]),
            cat=0,
            geo='',
            gprop='',
            sleep=60)

        if trends.empty:
            return 'Failed'

        trends = trends.drop(['isPartial'],
                             axis=1).reset_index().drop_duplicates(
                                 subset=['date'], keep="last").copy()

        columns_trends = trends.columns
        for index, row in trends.iterrows():
            for col in columns_trends:
                if col == 'date':
                    continue

                try:
                    google_entity = GoogleTrends(row['date'].timestamp(), col,
                                                 row[col])
                    db.session.add(google_entity)
                    db.session.commit()
                except Exception:
                    db.session.rollback()
                    print('Already has value ' + col + ' ' + str(row['date']))

        i += 3

    return 'Success'
Пример #11
0
def main():
    filepath = "data/test.csv"
    
    trending_topics = ["economy", "energy", "bonds", "crisis", "finance"]
    
    start_date = pd.Timestamp("2011-01-01")
    end_date = pd.Timestamp.now()
    
    if os.path.isfile(filepath):
        df = pd.read_csv(filepath, index_col=0)
        df.index = pd.to_datetime(df.index)
        start_date = max(max(df.index), start_date)
        start_date = start_date + pd.Timedelta("1h")
    else:
        df = pd.DataFrame([])
    
    print("starting with {0} to {1}".format(start_date, end_date))
    
    ptr = TrendReq(hl='en-US', tz=1)
    
    months = list(pd.date_range(start_date,end_date, freq="1m"))
    
    ranges = list(zip(months[0:-1],months[1:]))
    date_range = ranges
    for start_date, end_date in ranges:
        print(start_date, end_date)
        df_new = ptr.get_historical_interest(trending_topics, 
                                         year_start=start_date.year, 
                                         month_start=start_date.month, 
                                         day_start=start_date.day, 
                                         hour_start=start_date.hour, 
                                         year_end=end_date.year,
                                         month_end=end_date.month, 
                                         day_end=end_date.day, 
                                         hour_end=end_date.hour, 
                                         cat=0, 
                                         geo='', 
                                         gprop='', 
                                         sleep=60
                                        )
        
    
        df_sub = df_new.drop('isPartial', axis=1)
        
        if len(df_sub):
            df = pd.concat([df, df_new])    
            df.to_csv(filepath)
    
    df2.plot()
Пример #12
0
def get_most_popular(keyword_list):
    pytrend = TrendReq()
    df = pytrend.get_historical_interest(keyword_list,
                                         year_start=2018,
                                         month_start=1,
                                         day_start=1,
                                         hour_start=0,
                                         year_end=2018,
                                         month_end=2,
                                         day_end=1,
                                         hour_end=0,
                                         cat=0,
                                         geo='',
                                         gprop='',
                                         sleep=0)
    return df, df.max().idxmax()
Пример #13
0
def google_trend(kw_list=["ETH", "Ethereum"],
                 year_start=2018,
                 month_start=9,
                 day_start=1,
                 hour_start=0,
                 year_end=2019,
                 month_end=8,
                 day_end=13,
                 hour_end=0,
                 cat=0,
                 geo="",
                 gprop="",
                 sleep=60,
                 save=True):

    pytrends = TrendReq()

    print(
        "Sending requests to Google Trends. It may take some time as requests are beeing splitted for each week, please be patient..."
    )

    trends = pytrends.get_historical_interest(kw_list,
                                              year_start=year_start,
                                              month_start=month_start,
                                              day_start=day_start,
                                              hour_start=hour_start,
                                              year_end=year_end,
                                              month_end=month_end,
                                              day_end=day_end,
                                              hour_end=hour_end,
                                              cat=cat,
                                              geo=geo,
                                              gprop=gprop,
                                              sleep=sleep)

    dates = trends.index
    time = [t.timestamp for t in dates]
    trends = trends.reset_index(drop=True)
    trends["time"] = time

    if save:
        if not os.path.exists("tmp/"):
            os.mkdir("tmp/")

        trends.to_csv("tmp/trends_{}.csv".format(kw_list[0]), index=False)

    return trends
Пример #14
0
def get_historical_data():
    current_date = datetime.datetime.now().date()
    month_start = (current_date - datetime.timedelta(days=30)).month

    pytrends = TrendReq(hl='en-US',
                        tz=360,
                        timeout=(10, 25),
                        retries=2,
                        backoff_factor=0.1)
    data = pytrends.get_historical_interest(keywords=KEYWORDS_LIST,
                                            cat=ALCOHOL_CATEGORY,
                                            geo=GEO,
                                            year_start=current_date.year,
                                            year_end=current_date.year,
                                            day_end=31,
                                            month_start=month_start,
                                            month_end=current_date.month)
    return data
Пример #15
0
def getTrendsDataRaw(keyword, startDate, endDate):
    trends = TrendReq(hl='en-US',
                      tz=0)  # tz is timezone offset from UTC in minutes
    trend = trends.get_historical_interest([keyword],
                                           year_start=startDate.year,
                                           month_start=startDate.month,
                                           day_start=startDate.day,
                                           hour_start=startDate.hour,
                                           year_end=endDate.year,
                                           month_end=endDate.month,
                                           day_end=endDate.day,
                                           hour_end=endDate.hour,
                                           cat=0,
                                           geo='',
                                           gprop='',
                                           sleep=0)[keyword]

    return trend
Пример #16
0
def trends(topic):
    score = 0
    time = str(datetime.datetime.now())
    year = int(time[0:4])
    month = int(time[5:7])
    day = int(time[8:10])
    hour = int(time[11:13])
    pytrends = TrendReq(hl='ru-RU', tz=360)
    smth = \
        pytrends.get_historical_interest([topic], year_start=year, month_start=month, day_start=day - 7,
                                         hour_start=hour,
                                         year_end=year,
                                         month_end=month, day_end=day, hour_end=hour, cat=0, geo='', gprop='', sleep=0)[
            topic]
    for i in range(0, 167):
        score += smth[-i]
    score = float(score / 168)
    return score
Пример #17
0
def get_google_trend_v2():
    pytrends = TrendReq(hl='en-US', tz=0, geo='')
    interest_over_time_df = pytrends.get_historical_interest(['Bitcoin'],
                                                             year_start=2015,
                                                             month_start=1,
                                                             day_start=1,
                                                             hour_start=0,
                                                             year_end=2018,
                                                             month_end=5,
                                                             day_end=15,
                                                             hour_end=0,
                                                             cat=0,
                                                             geo='',
                                                             gprop='',
                                                             sleep=0)
    writer = pd.ExcelWriter("CryptoGoogleTrends_with_overlap.xlsx",
                            engine='xlsxwriter')
    interest_over_time_df.to_excel(writer)
    writer.save()
Пример #18
0
def getWeekTrend(word):
    kw_list = [word]

    today = date.today()
    tomorrow = today + datetime.timedelta(days=1)
    week_ago = today - datetime.timedelta(days=7)

    pytrends = TrendReq(hl='en-US', tz=360)
    kw_df = pytrends.get_historical_interest(kw_list,
                                             year_start=week_ago.year,
                                             month_start=week_ago.month,
                                             day_start=week_ago.day,
                                             hour_start=0,
                                             year_end=tomorrow.year,
                                             month_end=tomorrow.month,
                                             day_end=tomorrow.day,
                                             hour_end=0,
                                             sleep=120)
    kwWT = kw_df[word].sum()
    return kwWT
Пример #19
0
    def pytrends_pull(self, query: list, query_loc: str, start_yr: int,
                      start_mo: int, end_yr: int, end_mo: int, type: str):
        """
        pytrends_pull(query:str, query_loc:str, start_yr:int, start_mo:int, end_yr:int, end_mo:int)
        Generates .csv file in raw_data folder with tweets based on query
        """

        geocode = self.us_states[query_loc.lower()]['abbr']

        #creating connection to trend.google.com
        pytrend = TrendReq(timeout=(10, 25))

        startdate = datetime.datetime(year=start_yr, month=start_mo, day=1)
        enddate = datetime.datetime(year=end_yr,
                                    month=end_mo,
                                    day=calendar.monthrange(end_yr, end_mo)[1],
                                    hour=23)

        if type == 'hour':
            historical_interest = pytrend.get_historical_interest(
                keywords=query,
                cat=0,
                geo=geocode,
                year_start=start_yr,
                month_start=start_mo,
                day_start=1,
                hour_start=0,
                year_end=end_yr,
                month_end=end_mo,
                day_end=calendar.monthrange(end_yr, end_mo)[1],
                hour_end=0)
        if type == 'day':
            timeframe = '{} {}'.format(startdate.strftime("%Y-%m-%d"),
                                       enddate.strftime("%Y-%m-%d"))
            pytrend.build_payload(kw_list=query, timeframe=timeframe)
            historical_interest = pytrend.interest_over_time()
        if type == 'week':
            pytrend.build_payload(kw_list=query)
            historical_interest = pytrend.interest_over_time()

        return historical_interest
Пример #20
0
def start(search_words, start_date):
    print("... google module started")
    now = datetime.datetime.now()

    # Parameters for GOOGLE search
    kw_list = [search_words]
    year_start = int(start_date[:4])
    month_start = int(start_date[5:7])
    day_start = int(start_date[8:10])
    hour_start = 0
    print(kw_list, " ", year_start, " ", month_start, " ", day_start)

    # setting actual date for goggle search endpoint
    year_end = now.year
    month_end = now.month
    day_end = now.day
    hour_end = 0
    print(year_end, " ", month_end, " ", day_end)

    pytrend = TrendReq()
    pytrend.build_payload(kw_list)

    search_results = pytrend.get_historical_interest(kw_list,
                                                     year_start,
                                                     month_start,
                                                     day_start,
                                                     hour_start,
                                                     year_end,
                                                     month_end,
                                                     day_end,
                                                     hour_end,
                                                     cat=0,
                                                     geo='',
                                                     gprop='',
                                                     sleep=0)

    # save into file
    search_results.to_csv('google_results.csv')

    # print the first 10 datapoints
    print(search_results.head(10))
Пример #21
0
class PyTrendApiServiceWorker(object):
    kw_list = []

    kw_list_dictionary = {
        'bodystyles': ['coupe', 'pickup', 'sedan', 'suv', 'crossover'],
        'makes': ["honda", "chevy", "ford", "subaru"],
        'models':
        ['honda civic', 'ford f-150', 'ford fusion', 'toyota sienna']
    }

    trend_db = TinyDB("./db/tinytrenddb.json")

    def __init__(self):
        self.pytrends = TrendReq(hl='en-US', tz=360)

    def GetTheKeyWords(self):
        return

    def SetKwArray(self, kwList):
        self.kw_list = []
        self.kw_list = kwList
        return

    def GetKwTrendData(self):
        print(self.kw_list)
        df = self.pytrends.get_historical_interest(self.kw_list,
                                                   year_start=2018,
                                                   month_start=1,
                                                   day_start=1,
                                                   hour_start=0,
                                                   year_end=2019,
                                                   month_end=9,
                                                   day_end=1,
                                                   hour_end=0,
                                                   cat=0,
                                                   geo='',
                                                   gprop='',
                                                   sleep=0)
        df.to_csv("bodystyle_trends.txt")
        self.trend_db.insert({'body_styles_file_name': "bodystyle_trends.txt"})
Пример #22
0
def get_trends_dates(start_date, end_date, currencies_list):
    '''
    ' Retrieves the Google Trends values (mapped into a range between 0 and 1) between two dates,
    ' inclusive. The values represent search interest relative to the peak over the provided time interval,
    ' where a higher value suggests more popularity. The outer column of the returned data frame represents the 
    ' retrieved cryptocurrencies while the inner columns represents the retrieved metric (only "Trends", in this case).
    ' 
    ' start_date (datetime) - the start date, with month, day, and year provided
    ' end_date (datetime) - the end date, with month, day, and year provided
    ' currencies_list (list) - the list of currencies to associate with the given fear and greed values
    '''
    trends = TrendReq(hl='en-US',
                      tz=0)  # tz is timezone offset from UTC in minutes

    trends_data_frame = pd.DataFrame()
    for currency in currencies_list:
        trend = trends.get_historical_interest([currency.value.name],
                                               year_start=start_date.year,
                                               month_start=start_date.month,
                                               day_start=start_date.day,
                                               hour_start=start_date.hour,
                                               year_end=end_date.year,
                                               month_end=end_date.month,
                                               day_end=end_date.day,
                                               hour_end=end_date.hour,
                                               cat=0,
                                               geo='',
                                               gprop='',
                                               sleep=0)
        trend.index = trend.index.floor('d')
        trend = trend.drop('isPartial', axis=1)
        trend.columns = pd.MultiIndex.from_product([[currency], ["Trends"]])
        trend = trend.groupby("date").mean()
        trends_data_frame = pd.concat([trend, trends_data_frame], axis=1)

    trends_data_frame = trends_data_frame.apply(
        lambda val: mathutil.map(val, MIN_TRENDS_VAL, MAX_TRENDS_VAL, 0, 1))

    return trends_data_frame
Пример #23
0
def get_data(keyword, trend_type):
    pytrend = TrendReq(hl='en-US', tz=360)

    if trend_type == 'get_historical_interest':
        pytrend.build_payload(kw_list=[keyword],
                              cat=0,
                              timeframe='today 3-m',
                              geo='',
                              gprop='')
        data = pytrend.get_historical_interest([keyword])
        data = data.to_dict()[keyword]
        return data

    elif trend_type == 'intereset_by_region':
        all_data = []
        # keywords = [i.lstrip(' ') for i in keyword.split(',') if i.startswith(' ') or i]
        for k in keyword:
            pytrend.build_payload([k])
            data = pytrend.interest_by_region()
            print('########3', data.to_dict()[k])
            all_data.append(data.to_dict()[k])
        return all_data
    return None
Пример #24
0
    def pytrends_pull(self, query: str, query_loc: str, start_yr: int,
                      start_mo: int, end_yr: int, end_mo: int):
        """
        pytrends_pull(query:str, query_loc:str, start_yr:int, start_mo:int, end_yr:int, end_mo:int)
        Generates .csv file in raw_data folder with tweets based on query
        """

        # geocode = "US-{}".format(self.us_states[query_loc.lower()]['abbr'])
        geocode = ""

        #creating connection to trend.google.com
        pytrend = TrendReq()

        # creating historical interest query on give params
        historical_interest = pytrend.get_historical_interest(
            keywords=[query],
            cat=0,
            geo=geocode,
            year_start=start_yr,
            month_start=start_mo,
            day_start=1,
            hour_start=0,
            year_end=end_yr,
            month_end=end_mo,
            day_end=30,
            hour_end=0)

        # creating a pytrend payload based on keyword for time_interest and related_queries
        pytrend.build_payload(kw_list=[query])

        #time_interest dataframe and related_queries dictionary dict(dict)
        time_interest = pytrend.interest_over_time()
        related_queries = pytrend.related_queries()

        no_spaces = query.replace(" ", "_")

        dir = os.path.dirname(__file__)

        filename = os.path.join(
            dir, "..", "exported_files",
            "trend_history_interest_{}.csv".format(no_spaces))
        filename = filename.replace("/", "\\")

        #exporting historical interest to csv
        historical_interest.to_csv(filename)

        filename = os.path.join(dir, "..", "exported_files",
                                "trend_time_interest_{}.csv".format(no_spaces))
        filename = filename.replace("/", "\\")

        #exporting time interest to csv
        time_interest.to_csv(filename)

        filename = os.path.join(
            dir, "..", "exported_files",
            "trend_related_queries_{}.csv".format(no_spaces))
        filename = filename.replace("/", "\\")

        #exporting related_queries to csv
        try:
            with open(filename, 'w') as csvf:
                writer = csv.DictWriter(
                    csvf, fieldnames=related_queries[query].keys())
                writer.writeheader()
                for data in related_queries.values():
                    writer.writerow(data)

        except IOError:
            print("Error writing related queries file.")
# Since pytrends is returning a DataFrame object, we need pandas:
import pandas as pd
# Import of pytrends (needs to be pip installed first):
from pytrends.request import TrendReq

pytrends = TrendReq(hl='en-US', tz=360)
kw_list = ['Bitcoin', 'BTC']

search_df = pytrends.get_historical_interest(kw_list,
                                             year_start=2020,
                                             month_start=1,
                                             day_start=1,
                                             hour_start=0,
                                             year_end=2020,
                                             month_end=1,
                                             day_end=1,
                                             hour_end=0,
                                             cat=0,
                                             geo='',
                                             gprop='',
                                             sleep=60)

search_df.to_csv("../data/Trends/BTC_trend_complete.csv")
Пример #26
0
class APICaller:
    def __init__(self):
        self.feature_dict = {}
        self.weather_cities = ["new york city", "boston", "los angeles"]
        self.company_names = ["apple", "microsoft", "amazon", "facebook"]
        self.pytrends = TrendReq(hl="en-US", tz=360)
        self.symbols = [
            "AAPL", "MSFT", "AMZN", "FB", "NFLX", "MCD", "WEN", "SHAK", "TSLA"
        ]
        # used to store price, percentChange, volume
        self.data = {}
        self.prices_to_remember = 30
        # remember a certain number of past prices (a list) for each symbol
        self.past_prices = {}
        for symbol in self.symbols:
            self.past_prices[symbol] = []

    # findData will use the stockScraper class to pull data, unless there's an issue
    def findData(self, symbol):
        try:
            dictionary = stockScrape.stockScraper(symbol)
            self.data.update(dictionary)
            return 1
        except:
            return 0

    # Gets the current price of a company under a given symbol
    def getPrice(self, symbol):
        return float(self.data[symbol][0])

    # Gets the current percent change of a company under a given symbol for that day
    def getPercentChange(self, symbol):
        return float(self.data[symbol][1])

    # Gets the amount of shares sold for a company for that day
    def getVolume(self, symbol):
        return float(self.data[symbol][2])

    # we need to call this EVERY time that we are updating i.e. once a minute
    def update_values(self):
        # there is a chance that the http stuff craps out, so have an error message
        the_val = 1
        for symbol in self.symbols:
            the_val *= self.findData(symbol)

        if the_val == 0:
            # indicate that there was an error, and stop all of this
            return True

        # update temperature and humidity features (in F and %, respectively)
        for city in self.weather_cities:
            weather = Weather(unit=Unit.FAHRENHEIT)
            location = weather.lookup_by_location(city)
            self.feature_dict[city + " temperature"] = float(
                location.condition.temp)
            self.feature_dict[city + " humidity"] = float(
                location.atmosphere.humidity)

        # update google trending info for each word
        historical_info = self.pytrends.get_historical_interest(
            self.company_names,
            year_start=2018,
            month_start=12,
            day_start=1,
            hour_start=0,
            year_end=2018,
            month_end=12,
            day_end=25,
            hour_end=0,
            cat=0,
            geo='US',
            gprop='',
            sleep=0)
        for name in self.company_names:
            # don't worry about the 3 Google errors here... it's fine
            self.feature_dict[name + " trend"] = float(
                historical_info[name][-1])

        # update the number of minutes in the day
        date_string = str(datetime.now())
        time_string = (date_string.split())[1]
        time_list = time_string.split(':')
        self.feature_dict["minutes"] = int(time_list[0]) * 60 + int(
            time_list[1])

        # update features based on the symbols
        for symbol in self.symbols:
            price = self.getPrice(symbol)
            self.feature_dict[symbol + " price"] = price

            # put price at the beginning of past prices
            self.past_prices[symbol] = [price] + self.past_prices[symbol]
            # only remember at most fixed number
            if len(self.past_prices[symbol]) > self.prices_to_remember:
                self.past_prices[symbol].pop()

            self.feature_dict[symbol +
                              " percent"] = self.getPercentChange(symbol)
            self.feature_dict[symbol + " volume"] = self.getVolume(symbol)
        return False

    def get_dict(self):
        return copy.deepcopy(self.feature_dict)

    def print_features(self):
        the_dict = self.get_dict()
        for key in the_dict:
            print(key + " : " + str(the_dict[key]))

    # return features based on the keys given in
    def return_features(self, keys):
        the_dict = self.get_dict()
        return_list = []
        for key in keys:
            return_list.append(the_dict[key])
        return return_list

    def return_last_prices(self):
        return copy.deepcopy(self.past_prices)

    def take_in_array(self, arr, key_arr):
        # update that dictionary
        self.feature_dict = {}
        for i, key in enumerate(key_arr):
            self.feature_dict[key] = float(arr[i])

        for symbol in self.symbols:
            price = self.feature_dict[symbol + " price"]
            # put price at the beginning of past prices
            self.past_prices[symbol] = [price] + self.past_prices[symbol]
            # only remember at most fixed number
            if len(self.past_prices[symbol]) > self.prices_to_remember:
                self.past_prices[symbol].pop()
pt = TrendReq(hl="en-US", tz=360)

# set the keyword & timeframe
pt.build_payload(["Python", "Java"], timeframe="all")

# get the interest over time
iot = pt.interest_over_time()
iot

# plot it
iot.plot(figsize=(10, 6))

# get hourly historical interest
data = pt.get_historical_interest(
    ["data science"], 
    cat=396, 
    year_start=2022, month_start=1, day_start=1, hour_start=0,
    year_end=2022, month_end=2, day_end=10, hour_end=23,
)
data

# the keyword to extract data
kw = "python"
pt.build_payload([kw], timeframe="all")
# get the interest by country
ibr = pt.interest_by_region("COUNTRY", inc_low_vol=True, inc_geo_code=True)

# sort the countries by interest
ibr[kw].sort_values(ascending=False)

# get related topics of the keyword
rt = pt.related_topics()
Пример #28
0
from pytrends.request import TrendReq

pytrends = TrendReq(hl='en-US', tz=360)

kw_list = ["Trump"]

test = pytrends.get_historical_interest(kw_list,
                                        year_start=2019,
                                        month_start=8,
                                        day_start=7,
                                        hour_start=0,
                                        year_end=2019,
                                        month_end=8,
                                        day_end=8,
                                        hour_end=0,
                                        cat=0,
                                        geo='',
                                        gprop='',
                                        sleep=0)

test2 = pytrends.interest_over_time()

print(test2)
Пример #29
0
import numpy as np
import datetime as dt
import time as time
import os
from pytrends.request import TrendReq
import ML_functions as mlfcn
import Signals_Testing as st
pytrends = TrendReq(hl='en-US', tz=300)

kw_list = ['Bitcoin', 'ethereum', 'cryptocurrency']
GPROP = 'news'
#pytrends.build_payload(kw_list, cat=0, timeframe='today 5-y', geo='', gprop='')

results = pytrends.get_historical_interest(kw_list,
                                           year_start=2018,
                                           month_start=2,
                                           day_start=1,
                                           hour_start=0,
                                           year_end=2018,
                                           month_end=11,
                                           day_end=1,
                                           hour_end=0,
                                           cat=0,
                                           geo='',
                                           gprop=GPROP,
                                           sleep=0)

print(results)

st.write_new(results, 'google_trends_btc.xlsx', 'sheet1')
Пример #30
0
class GoogleTrend(DataCollector):  # 구글 트렌드를 통해 정보를 가져오는 클래스
    def __init__(self,
                 keyword=['youtube'],
                 hl='ko',
                 tz='82',
                 timeframe='today 5-y',
                 cat=0,
                 geo='KR',
                 gprop=''):  # 생성자 기본 설정 값
        self.hl = hl
        self.tz = tz
        self.keyword = keyword
        self.timeframe = timeframe
        self.cat = cat
        self.geo = geo
        self.gprop = gprop
        self.update_pytrend()
        self.update_payload()

    # Login to Google. Only need to run this once, the rest of requests will use the same session.
    def update_pytrend(self):
        self.pytrend = TrendReq(hl=self.hl, tz=self.tz)

    # Create payload and capture API tokens. Only needed for interest_over_time(), interest_by_region() & related_queries()
    def update_payload(self):
        self.pytrend.build_payload(kw_list=self.keyword,
                                   cat=self.cat,
                                   timeframe=self.timeframe,
                                   geo=self.geo,
                                   gprop=self.gprop)

    def set_pytrend(self,
                    hl='None',
                    tz='None'):  # hl는 host language, tz는 time zone
        if hl != 'None':  # ex) 'ko', 'en_US'
            self.hl = hl
        if tz != 'None':  # ex) 82:한국, 360:미국
            self.tz = tz
        self.update_pytrend()
        self.update_payload()

    def set_payload(self,
                    keyword=None,
                    timeframe='None',
                    cat=-1,
                    geo='None',
                    gprop='None'):  # 키워드리스트, 타임프레임, 카테고리, 지역, 구글 프로퍼티
        if keyword != None:
            self.keyword = keyword
        if timeframe != 'None':  # ex) 'all', 'today 5-y', 'today 1,2,3-m', 'now 1,7-d', 'now 1,4-H', '2018-05-20 2019-01-20'
            self.timeframe = timeframe
        if cat != -1:
            self.cat = cat
        if geo != 'None':  # ex) 'KR', 'US', ''
            self.geo = geo
        if gprop != 'None':  # ex) 'images', 'news', 'youtube', 'froogle'
            self.gprop = gprop
        self.update_payload()

    def load_data(self, keyword=None):
        if keyword == 'region':
            self.interest_by_region()
            return self.interest_by_region_df_to_list()
        elif keyword == 'gender':
            return self.search_rate_by_gender()

    # Interest Over Time
    def interest_over_time(self):
        self.interest_over_time_df = self.pytrend.interest_over_time(
        )  # Returns pandas.Dataframe
        self.interest_over_time_df = self.interest_over_time_df.iloc[:, :self.
                                                                     keyword.
                                                                     __len__(
                                                                     )]  # 안쓰는 데이터 isPartial 제거
        self.interest_over_time_list = self.interest_over_time_df_to_list()
        return self.interest_over_time_list

    # Interest Over Time hourly
    def historical_hourly_interest(self):
        self.historical_hourly_interest_df = self.pytrend.get_historical_interest(
            keywords=self.keyword,
            year_start=2019,
            month_start=4,
            day_start=1,
            hour_start=0,
            year_end=2019,
            month_end=5,
            day_end=1,
            hour_end=0,
            cat=0,
            geo='KR',
            gprop='',
            sleep=0)  # Returns pandas.Dataframe
        self.historical_hourly_interest_df = self.historical_hourly_interest_df.iloc[:, :
                                                                                     self
                                                                                     .
                                                                                     keyword
                                                                                     .
                                                                                     __len__(
                                                                                     )]  # 안쓰는 데이터 isPartial 제거
        self.historical_hourly_interest_list = self.historical_hourly_interest_df_to_list(
        )
        return self.historical_hourly_interest_list

    # Interest by Region
    def interest_by_region(self):  # 지역별로 검색 비율을 알려준다
        self.interest_by_region_df = self.pytrend.interest_by_region()
        self.interest_by_region_list = self.interest_by_region_df_to_list()
        return self.interest_by_region_list

    # Related Topics, Returns dictionary of pandas.DataFrames
    def related_topics(self):  # 키워드 관련 토픽을 순위별로 알려준다
        self.related_topics_dict = self.pytrend.related_topics()
        return self.related_topics_dict

    # Related Queries, returns a dictionary of dataframes
    def related_queries(self):  # 키워드 관련 검색어를 순위별로 알려준다
        self.related_queries_dict = self.pytrend.related_queries()
        return self.related_queries_dict

    # trending searches in real time
    def trending_searches(self):  # 현재 시간대 인기검색어 순위 20까지 보여준다
        self.trending_searches_df = self.pytrend.trending_searches(
            pn='south_korea')
        return self.trending_searches_df

    #
    def today_searches(self):  #
        self.today_searches_df = self.pytrend.today_searches()
        return self.today_searches_df

    # Get Google Top Charts
    def top_charts(self):  # 년 단위로 상위 핫 키워드 가져오기
        self.top_charts_df = self.pytrend.top_charts(
            date=2015, hl='ko', tz='82', geo='KR'
        )  # date = YYYY integer, tz='82', geo='KR', geo='GLOBAL', geo='US'
        return self.top_charts_df

    # Get Google Category
    def categories(self):  # 구글 카테고리 종류와 id를 보여준다
        self.categories_df = self.pytrend.categories()
        return self.categories_df

    def show_interest_over_time(self):  # 시간에 따른 검색 비율을 그래프로 보여준다
        num = 0.0
        plt.figure(figsize=(14, 4))
        plt.style.use('ggplot')  # 더 이쁘게 그려준다
        for key in self.keyword:
            num += 0.1
            plt.plot(self.interest_over_time_df[key],
                     c=plt.cm.rainbow(num),
                     label=key)
        plt.legend(bbox_to_anchor=(1, 1), loc=2)  # 라벨의 위치를 정해준다
        plt.show()

    def interest_over_time_df_to_list(
            self):  # interest_over_time_df의 데이터프레임 타입의 데이터를 리스트 타입으로 변환
        date = self.interest_over_time_df.index.tolist()
        for i in range(len(date)):
            date[i] = date[i].date().strftime("%Y-%m-%d")
        date.insert(0, 'x')
        data = []
        data.append(date)
        for key in self.keyword:
            y = self.interest_over_time_df[key].tolist()
            y.insert(0, key)
            data.append(y)
        return data

    def historical_hourly_interest_df_to_list(
            self
    ):  # historical_hourly_interest_df의 데이터프레임 타입의 데이터를 리스트 타입으로 변환
        date = self.historical_hourly_interest_df.index.tolist()
        for i in range(len(date)):
            date[i] = date[i].date().strftime("%Y-%m-%d")
        date.insert(0, 'x')
        data = []
        data.append(date)
        for key in self.keyword:
            y = self.historical_hourly_interest_df[key].tolist()
            y.insert(0, key)
            data.append(y)
        return data

    def interest_by_region_df_to_list(
            self):  # interest_by_region_df의 데이터프레임 타입의 데이터를 리스트 타입으로 변환
        region = self.interest_by_region_df.index.tolist()
        data = []
        for key in self.keyword:
            y = self.interest_by_region_df[key].tolist()
        ratio = 0
        for i in [0, 1, 2, 3, 8, 11, 12, 13, 14, 15]:
            ratio += y[i]
        ratio /= 100
        tmp_val = 0
        reg_name = ''
        if ratio > 0:
            for i in range(len(region)):
                if i in [1, 2, 14, 11, 0, 13]:
                    if i == 0:
                        tmp_val = round(y[i] / ratio)
                        reg_name = '강원도'
                    elif i == 1:
                        tmp_val = round((y[i] + y[i + 1]) / ratio)
                        reg_name = '서울/경기'
                    elif i == 2:
                        tmp_val = round((y[i] + y[i + 1]) / ratio)
                        reg_name = '경상도'
                    elif i == 11:
                        tmp_val = round((y[i] + y[i + 1]) / ratio)
                        reg_name = '전라도'
                    elif i == 13:
                        tmp_val = round(y[i] / ratio)
                        reg_name = '제주도'
                    elif i == 14:
                        tmp_val = round((y[i] + y[i + 1]) / ratio)
                        reg_name = '충청도'
                    data.append([reg_name, tmp_val])
        return data

    def search_rate_by_gender(self):
        gender_data = []
        gender_data.append(['male', random.randint(50, 100)])
        gender_data.append(['female', random.randint(50, 100)])
        return gender_data