示例#1
0
 def getDemand(self):
     try:
         pytrends = TrendReq(hl='en-US', tz=360)
         res = dailydata.get_daily_data(self.keyword, start_year=self.year_start, start_mon=self.month_start, stop_year=self.year_end, stop_mon=self.month_end, geo='', wait_time = 10)
         return res
     finally:
         # os.remove(demand.temp_dir)
         pass
示例#2
0
def get_google_trends(kw, start_unix, end_unix, file_name):
    """
    Get Google Trends Data.

    Parameters
    ----------
    kw : str
        DESCRIPTION.
    start_unix : int
        Epoch (Unix) Time in Seconds.
    end_unix : int
        Epoch (Unix) Time in Seconds.
    file_name : str
        File Path.

    Returns
    -------
    NoneType
        None.
    """
    if os.stat(file_name).st_size == 0:  # If the file is blank
        last_start_unix = None
    else:  # Otherwise set the last start_unix date
        last_start_unix = pd.read_csv(file_name, index_col='Date').index[-1]
        last_start_unix = ts_to_unix(last_start_unix)

    # Set the start_unix date of our function as the last_start_unix
    if last_start_unix is not None and start_unix < last_start_unix:
        start_unix = last_start_unix

    start = unix_to_ts(start_unix)
    end = unix_to_ts(end_unix)

    year_start = start.year
    month_start = start.month

    year_end = end.year
    month_end = end.month

    search_volume = dailydata.get_daily_data(kw,
                                             year_start,
                                             month_start,
                                             year_end,
                                             month_end,
                                             geo='')
    search_volume.drop(search_volume.columns[0:4], axis=1, inplace=True)
    search_volume['Date'] = search_volume.index
    search_volume['Date'] = search_volume['Date'].apply(date_to_str)
    search_volume.set_index('Date', inplace=True)
    search_volume.rename({kw: 'Search Volume'}, axis=1, inplace=True)

    with open(file_name, 'a') as f:
        search_volume.to_csv(f, header=(f.tell() == 0))
    print(f'Finished! Updated from {start} to {end}')
示例#3
0
def update_csv_data(ticker):
    start = datetime.date(2010, 1, 1) #datetime.date
    end = datetime.date.today() #datetime.date
    try:
        pd.read_csv(csv, index_col=0)
    except FileNotFoundError:
        get_daily_data(ticker, start.year, start.month, end.year, end.month, geo="US", verbose=True).to_csv(csv)
    finally:
        df = pd.read_csv(csv, index_col=0) #pandas.DataFrame
        last = df.index[len(df) - 1]
        last = datetime.datetime.strptime(last, '%Y-%m-%d').date() #datetime.date
        if last != end:
            get_daily_data(ticker, last.year, last.month, end.year, end.month, geo="US", verbose=True).to_csv("temp")
            new_df = pd.read_csv("temp", index_col=0) #pandas.DataFrame
            os.remove("temp")
            new = last + datetime.timedelta(days=1)
            new = new.strftime('%Y-%m-%d')
            if new in new_df.index:
                df = df.append(new_df.loc[new:])
            # if next.toString() in
            #if there is overlap of index from df and new_df, only add the parts of new_df that does not overlap
            df.to_csv(csv)
示例#4
0
    def add_trends(self):

        df = dailydata.get_daily_data('recession',
                                      2004,
                                      1,
                                      2019,
                                      10,
                                      geo='USA')
        self.train_data = pd.merge(left=self.train_data,
                                   right=df,
                                   how='inner',
                                   on='Date',
                                   suffixes=(False, False))
        self.the_list.append('kurt')
示例#5
0
def fetch():
    pytrend = TrendReq(hl='en-GB', tz=360)
    keywords_list = load_keywords("./data/raw/keywords_list.txt")
    data = defaultdict(list)
    for x in range(0, len(keywords_list)):
        keyword = keywords_list[x]
        df = dailydata.get_daily_data(keyword, 2004, 8, 2020, 8, geo='GB')
        if not df.empty:
            col_name = f'{keyword}_unscaled'
            values = list(df[col_name].values)
            data[keyword] = values

    data_by_days = pd.DataFrame.from_dict(data)
    data_by_days = data_by_days.set_index(df.index)
    data_by_days.to_csv('./data/raw/search_trends.csv')
    print('Data is fetched!')
def main():
    stock_symbols = st.sidebar.multiselect('Select stocks:',
                                           (MSCI_WORLD_SYMBOLS + DAX_SYMBOLS))

    for stock in stock_symbols:
        # TODO: also search for "{stock} portfolio" or "{stock} stock"
        df = get_daily_data(f"{stock}",
                            START_YEAR,
                            START_MONTH,
                            END_YEAR,
                            END_MONTH,
                            geo="",
                            wait_time=1.0)
        st.write(df)
        st.line_chart(df)
        df.to_csv(f"data/trends/{stock}.csv")
示例#7
0
def download_daily_google_trends(keyword, start_year, start_month, end_year,
                                 end_month):
    """
    Query for and aggregate daily google search trends data for 'keyword' and
    download it as a CSV named 'google_trends_{keyword}_{timestamp}.csv'

    Args:
        keyword: (str) word to search for
        start)year: (int) returning trends starting from this year (and month)
        start_month: (int) returning trends starting from this (year and) month
        end_year: (int) returning trends ending at this year (and month)
        end_month: (int) returning trends ending at this (year and) month
    
    Returns:
        None
    
    Examples:
        download_daily_google_trends(keyword = 'ethereum', start_year=2015, start_month=7, end_year=2019, end_month=11)
    """
    #API doc  and math explained: https://github.com/GeneralMills/pytrends/blob/master/pytrends/dailydata.py
    df_daily = dd.get_daily_data(keyword, start_year, start_month, end_year,
                                 end_month)
    print(df_daily.tail(31))

    # plotting the data per month obtained from Google
    plt.plot(df_daily.index, df_daily[f"{keyword}_monthly"])
    plt.autoscale(enable=True, axis='x', tight=True)
    plt.title(f"Google trends (monthly data): {keyword}")
    plt.grid(True)
    plt.show()

    #plotting the daily data rescaled from the monthly data and the data in a month month 'APIs'
    plt.plot(df_daily.index, df_daily[f"{keyword}"])
    plt.autoscale(enable=True, axis='x', tight=True)
    plt.title(
        f"Google trends(rescaled to make the daily data comparable): {keyword}"
    )
    plt.grid(True)
    plt.show()

    #download CSV of the dt
    timestamp = int(datetime.timestamp(datetime.now()))
    filename = f"google_trends_{keyword}_{timestamp}.csv"
    df_daily.to_csv(filename)
    return
示例#8
0
def get_daily_google_data(kw_list, from_year, from_month):
    # kw_list - [0] ticker, [1] full name, [2] name without corporation classification
    if not path.exists(f'googledata'):
        os.mkdir('googledata')


    if path.exists(f'googledata/{kw_list[0]}.csv'):
        df = pd.read_csv(f'googledata/{kw_list[0]}.csv').set_index('Date')
        df.index = pd.to_datetime(df.index)
        return df
    else:
        df = pd.DataFrame()
        for kw in kw_list:
            df[kw] = dailydata.get_daily_data(kw, from_year, from_month, dt.now().year, dt.now().month)[kw]
        df.index.name = "Date"
        df.index = pd.to_datetime(df.index)
        df.to_csv(f'googledata/{kw_list[0]}.csv')
        return df
示例#9
0
                    count += 1
                else:
                    CASH[ CCC ] = temp_cash
                    count += 1

        self.CASH = CASH

    def smotetomek( self ):

        smt = SMOTETomek()
        self.X_train, self.y_train = smt.fit_sample( self.X_train, self.y_train )

pytrend = TrendReq()
help(pytrend)

df = dailydata.get_daily_data('recession', 2019, 10, 2020, 5, geo = 'USA')

set_date_inputs = { 'start_date': "2000-01-01", 'end_date': "2020-06-19" }
st_ret_inputs = { 'change_days': [ 1, 3, 5, 14, 21 ] }
volatility_inputs = { 'windows' : [ 5, 15, 30, 60, 90, 180 ] }
lt_ret_inputs = { 'change_days' : [ 60, 90, 180, 250 ] }
ma_inputs = { 'sma_list' : [ 15, 30, 60, 90, 180 ], 'ema_list' : [ 90, 180 ] }
create_labels_inputs = { 'target_return_period' : 14, 'tail_probs' : [ 0.25, 0.70 ] }


data = Data()
data.execute_all( set_date_inputs, st_ret_inputs, volatility_inputs, lt_ret_inputs,
                  ma_inputs, create_labels_inputs, regime = True, high = True, low = False, kurt = False )

ext_spike_hp = {'criterion': 'entropy',
            'max_depth': 212,
示例#10
0
    parser.set_defaults(plot=False)
    parser.set_defaults(verbose=False)
    args = parser.parse_args()

    input_path = args.input
    logging.basicConfig(filename='./google_trends_crawler.log', level=logging.INFO)

    if not os.path.exists(input_path):
        print('>>> Input file does not exist!')
        print('>>> Exit...')
        sys.exit(1)

    # == == == == == == Part 3: Start Google trends crawler == == == == == == #
    # read queries from the input file
    with open(input_path, 'r') as input_data:
        for line in input_data:
            query_json = json.loads(line.rstrip())
            keyword = query_json['keyword']
            mid = query_json['mid']

            start_date_str = query_json['start_date']
            end_date_str = query_json['end_date']
            start_date_obj = datetime.strptime(start_date_str, '%Y-%m-%d')
            logging.info('>>> Query for topic {0}'.format(keyword))

            # result dict
            google_trends = {'start_date': start_date_str, 'end_date': end_date_str, 'daily_search': []}

            res_df = dailydata.get_daily_data(word=mid, start_year=2017, start_mon=1, stop_year=2018, stop_mon=4)
            res_df.to_csv('data/{0}.csv'.format(keyword))
示例#11
0
from pytrends.request import TrendReq
from pytrends import dailydata
import pandas as pd
#ran keywords separately as giving a payload gave an 400 error
ao1 =dailydata.get_daily_data("Vaccine 5G", 2020, 3, 2021, 5, geo="",)
ao2 =dailydata.get_daily_data("plandemic", 2020, 3, 2021, 5, geo="",)
ao3 =dailydata.get_daily_data("anti mask", 2020, 3, 2021, 5, geo="",)
ao4 =dailydata.get_daily_data("Great Reset", 2020, 3, 2021, 5, geo="",)
ao5 =dailydata.get_daily_data("Bill Gates Vaccine", 2020, 3, 2021, 5, geo="",)
#saved all of them under a single csv file
ao = pd.concat([ao1,ao2,ao3,ao4,ao5], axis = 1)
ao.to_csv('jay.csv', sep=',',index=False)
示例#12
0
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pandas_datareader as web
import datetime as dt
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
company = 'FB'
key = 'facebook'

start = dt.datetime(2012, 1, 1)
end = dt.datetime(2020, 1, 1)

data = web.DataReader(company, 'yahoo', start, end)
df = dailydata.get_daily_data(key, 2019, 1, 2019, 2, geo='US')

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data_c = scaler.fit_transform(data['Close'].values.reshape(-1, 1))
scaled_data_h = scaler.fit_transform(data['High'].values.reshape(-1, 1))
scaled_data_l = scaler.fit_transform(data['Low'].values.reshape(-1, 1))
scaled_data_o = scaler.fit_transform(data['Open'].values.reshape(-1, 1))

x_train = []
y_train = []

for x in range(21, len(scaled_data_c)):
    h_l = float(scaled_data_h[x] - scaled_data_l[x])
    o_c = float(scaled_data_o[x] - scaled_data_c[x])
    seven_a = np.average(scaled_data_c[x - 7:x, 0])
    fourteen_a = np.average(scaled_data_c[x - 14:x, 0])
示例#13
0
def get_gsvi(w):
    gsvi = get_daily_data(w, 2012, 1, 2017, 3)
    gsvi['lag_media'] = gsvi.loc[:, w].rolling(win).median().shift(1)
    gsvi['asvi'] = np.log(gsvi.loc[:, w]) - np.log(gsvi.lag_media)
    gsvi.index = gsvi.index.map(lambda x: x.date)
    return gsvi
示例#14
0
import pandas as pd
from pytrends.request import TrendReq
from pytrends import dailydata

pytrends = TrendReq(hl='en-US', tz=360)
#keywords = ['Lana', 'Mercado Pago']
#pytrends.build_payload(
#     kw_list=keywords,
#     cat=0,
#     timeframe='today 3-m',
#     geo='TW',
#     gprop='')
#data = pytrends.interest_over_time()
#data.to_csv('Py_VS_R.csv', encoding='utf_8_sig')

#data = data.drop(labels=['isPartial'],axis='columns')
#data.to_csv('Py_VS_R.csv', encoding='utf_8_sig')

#image = data.plot(title = 'Python V.S. R in last 3 months on Google Trends')
#fig = image.get_figure()
#fig.savefig('figure.png')

#df = pytrends.trending_searches(pn='argentina')
#print(df)

#t = pytrends.get_historical_interest('Corona', year_start=2020, month_start=1, day_start=1, hour_start=0, year_end=2020, month_end=5, day_end=1, hour_end=0, cat=0, geo='', gprop='', sleep=20)
#print(t)

df = dailydata.get_daily_data('cinema', 2019, 1, 2019, 10, geo='BR')
print(df)