示例#1
0
def yahoo_finance_functions():
    si.get_data("AAPL")
    si.get_day_gainers()
    si.get_day_losers()
    si.get_day_most_active()
    si.get_analysts_info("AAPL")
    stock = si.get_live_price("AAPL")
示例#2
0
文件: stock1.py 项目: msv69/stock1
def import_yahoofinance2():
    # import stock_info module from yahoo_fin
    from yahoo_fin import stock_info as si

    # get live price of Apple
    si.get_live_price("aapl")
    
    # or Amazon
    si.get_live_price("amzn")
    
    # or any other ticker
    si.get_live_price(ticker)

    # get quote table back as a data frame
    si.get_quote_table("aapl", dict_result = False)
    
    # or get it back as a dictionary (default)
    si.get_quote_table("aapl")

    si.get_top_crypto()
    # get most active stocks on the day
    si.get_day_most_active()
    
    # get biggest gainers
    si.get_day_gainers()
    
    # get worst performers
    si.get_day_losers()
示例#3
0
def get_day_most_active(count=None):
    info = []
    if count == None:
        for itr1 in stock_info.get_day_most_active().iloc[:, 0]:
            info.append(itr1)
    else:
        counter = 0
        for itr1 in stock_info.get_day_most_active().iloc[:, 0]:
            if counter >= count:
                break
            else:
                info.append(itr1)
            counter += 1
    return info
示例#4
0
def most_active():
    active=get_day_most_active()
    most_active1=active['Symbol']
    price_ma1=active['Price (Intraday)']
    change_ma1=active['% Change']
    ma1_tick=(most_active1[0])
    ma1_price=('${:,.2f}'.format(price_ma1[0]))
    ma1_change=('{:.2%}'.format(change_ma1[0]/100))
    
    most_active2=active['Symbol']
    price_ma2=active['Price (Intraday)']
    change_ma2=active['% Change']
    ma2_tick=(most_active2[1])
    ma2_price=('${:,.2f}'.format(price_ma2[1]))
    ma2_change=('{:.2%}'.format(change_ma2[1]/100))
    
    most_active3=active['Symbol']
    price_ma3=active['Price (Intraday)']
    change_ma3=active['% Change']
    ma3_tick=(most_active3[2])
    ma3_price=('${:,.2f}'.format(price_ma3[2]))
    ma3_change=('{:.2%}'.format(change_ma3[2]/100))
    
    tickers=(f"The most active stocks are {ma1_tick}, {ma2_tick}, and {ma3_tick}.\n")
    prices=(f"Their prices are {ma1_price}, {ma2_price}, and {ma3_price}.\n")
    change=(f"They moved by {ma1_change}, {ma2_change}, and {ma3_change}.")
    return(tickers+prices+change)
示例#5
0
def get_top_symbols():
	activeStock = si.get_day_most_active()
	symbols = activeStock['Symbol'].tolist()
	symbols = symbols[:15]

	print (symbols)
	return symbols
    def post_market(self):
        
        root = {}
        root["market"] = {}

        """ Market Node """
        
        day_gainers = si.get_day_gainers().to_dict(orient="dict")
        day_losers = si.get_day_losers().to_dict(orient="dict")
        top_crypto = si.get_top_crypto().to_dict(orient="dict")
        most_active = si.get_day_most_active().to_dict(orient="dict")

        new_node = {"Day_Gainers" : day_gainers,
                        "Day_Losers"  : day_losers,
                        "Top_Crypto"  : top_crypto,
                        "Most_Active" : most_active}

        for node_key in new_node.keys():
            now = str(datetime.date(datetime.now()))
            key_var = now
            self.post(key_var, new_node[node_key], "Market", node_key)
            
            time.sleep(.5)
        
        print('Sucess\n')           
示例#7
0
def submit():
    if request.method == 'POST':
        if request.form.get("TopGainers"):
            top_gainers = si.get_day_gainers()
            file_name = "TopGainers-"+ str(datetime.today()).split()[0] + '.csv'
            return Response(
                top_gainers.to_csv(),
                mimetype="text/csv",
                headers={"Content-disposition":
                             f"attachment; filename={file_name}"})
        elif request.form.get("TopLosers"):
            top_losers = si.get_day_losers()
            file_name = "TopLosers-"+ str(datetime.today()).split()[0] + '.csv'
            return Response(
                top_losers.to_csv(),
                mimetype="text/csv",
                headers={"Content-disposition":
                             f"attachment; filename={file_name}"})

        elif request.form.get("MostActives"):
            most_active_stock = si.get_day_most_active()
            file_name = "MostActives-" + str(datetime.today()).split()[0] + '.csv'
            return Response(
                most_active_stock.to_csv(),
                mimetype="text/csv",
                headers={"Content-disposition":
                             f"attachment; filename={file_name}.csv"})
def getDayMovers():
    global gainers, losers, activity, gainerdata, loserdata, activedata
    gainerdata = pd.DataFrame(si.get_day_gainers())[['Symbol', 'Price (Intraday)', '% Change']]
    gainerdata['Price (Intraday)'] = "$" + gainerdata['Price (Intraday)'].astype(str)
    gainerdata['% Change'] = "+" + gainerdata['% Change'].astype(str) + "%"
    gainerdata['Combined'] = gainerdata[gainerdata.columns[0:]].apply(
        lambda x: ':'.join(x.dropna().astype(str)),
        axis=1
    )

    loserdata = pd.DataFrame(si.get_day_losers())[['Symbol', 'Price (Intraday)', '% Change']]
    loserdata['Price (Intraday)'] = "$" + loserdata['Price (Intraday)'].astype(str)
    loserdata['% Change'] = loserdata['% Change'].astype(str) + "%"
    loserdata['Combined'] = loserdata[loserdata.columns[0:]].apply(
        lambda x: ':'.join(x.dropna().astype(str)),
        axis=1
    )

    activedata = pd.DataFrame(si.get_day_most_active())[['Symbol', 'Price (Intraday)', '% Change']]
    activedata['Price (Intraday)'] = "$" + activedata['Price (Intraday)'].astype(str)
    activedata['% Change'] = activedata['% Change'].astype(str) + "%"
    activedata['Combined'] = activedata[activedata.columns[0:]].apply(
        lambda x: ':'.join(x.dropna().astype(str)),
        axis=1
    )

    # sets the list box data
    gainers.set('\n'.join(gainerdata['Combined']))
    losers.set('\n'.join(loserdata['Combined']))
    activity.set('\n'.join(activedata['Combined']))
示例#9
0
def dayMostActive():
    try:
        most_active = si.get_day_most_active()
        print("OK")
        return render_template("daymostactive.html", data=most_active)
    except:
        print("Error")
        return render_template("error.html")
示例#10
0
def get_most_active_tickers(ticker=''):
    r = (si.get_day_most_active()).sort_values(by='Volume', ascending=False)
    if not ticker:
        return (r[['Symbol', 'Price (Intraday)', '% Change', 'Volume' , 'Avg Vol (3 month)']]) 
    else:
        res= r[r.Symbol == ticker]
        # print (res[['Price (Intraday)', '% Change', 'Volume']])
        return res[['Price (Intraday)', '% Change', 'Volume']]
示例#11
0
def mostactive():
    """Returns: stock with the most activity in a given day and
    its stock price with format: 'TICKER': 'PRICE'."""

    day_active = si.get_day_most_active()
    output = str(day_active.at[0, 'Symbol']) + ': ' + str(
        round(si.get_live_price(day_active.at[0, 'Symbol']), roundNumber))
    return output
示例#12
0
def Daily_info():
    win = yf.get_day_gainers()
    win = win.sort_values(by='% Change',ascending = False).head(5)
    lose = yf.get_day_losers()
    lose = lose.sort_values(by='% Change',ascending = True).head(5)
    active = yf.get_day_most_active()
    active = active.head(5)
    return win,lose,active
示例#13
0
def get_active_tickers():
    active_tickers = si.get_day_most_active()

    active_array = []

    for x in range(10):
        active_array.append((str(active_tickers['Symbol'][x])))

    return jsonify(active_array)
示例#14
0
    def topact(self):
        topact = si.get_day_most_active()

        self.listBox.delete(*self.listBox.get_children())
        for x in range(25):
            self.listBox.insert(
                "",
                "end",
                values=(topact.loc[x, 'Symbol'], topact.loc[x, 'Name'],
                        '$' + str(topact.loc[x, 'Price (Intraday)']),
                        topact.loc[x, 'Change'], topact.loc[x, '% Change']))
示例#15
0
def active(update, context):
    stock_list = si.get_day_most_active()
    output = "Stock Code \t\t\t TTM\n\n"
    for i in range(0, 10):
        if math.isnan(stock_list.iloc[i, -1]):
            ttm_val = "---"
        else:
            ttm_val = str(format(stock_list.iloc[i, -1], '.2f'))
        output += str(i + 1) + ". " + str(
            stock_list.iloc[i, 0]).upper() + " \t\t\t " + ttm_val + ' \n'
    print(output)
    context.bot.send_message(chat_id=update.effective_chat.id, text=output)
示例#16
0
    def Download_pulse(self):
        self.pulse_result = {}
        # get most active stocks on the day
        if asyncio.get_event_loop().is_running(
        ):  # Only patch if needed (i.e. running in Notebook, Spyder, etc)
            import nest_asyncio
            nest_asyncio.apply()

        self.pulse_result['most_active'] = si.get_day_most_active()

        # get biggest gainers
        self.pulse_result['gainer'] = si.get_day_gainers()

        # get worst performers
        self.pulse_result['loser'] = si.get_day_losers()
        print('_____________Market Pulse Download Done________________')
# Imports
from pandas_datareader import data as pdr
from yahoo_fin import stock_info as si
from pandas import ExcelWriter
import yfinance as yf
import pandas as pd
import datetime
import time
yf.pdr_override()

# Variables
# tickers = si.tickers_sp500()
tickers = si.get_day_most_active()
print(tickers)
示例#18
0
import requests
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import yahoo_fin.stock_info as ya
from alpha_vantage.sectorperformance import SectorPerformances
pd.set_option('display.max_columns', None)

movers = ya.get_day_most_active()
movers = movers[movers['% Change'] >= 0]

res = requests.get('http://www.sentdex.com/financial-analysis/?tf=30d')
soup = BeautifulSoup(res.text, features="lxml")
table = soup.find_all('tr')

stock = []
sentiment = []
mentions = []
sentiment_trend = []

for ticker in table:
    ticker_info = ticker.find_all('td')
    try:
        stock.append(ticker_info[0].get_text())
    except:
        stock.append(None)
    try:
        sentiment.append(ticker_info[3].get_text())
    except:
        sentiment.append(None)
示例#19
0
def get_active():
    active = si.get_day_most_active()

    return active['Symbol']
示例#20
0
async def day_day_most_active(ctx):
    await ctx.send(si.get_day_most_active())
async def on_message(message):
    print(message.content)
    if message.content.find("!") != -1:
        content = message.content.strip('!').lower().split()
        print(content)
        print(content[0])

        if content[0] == "day":
            if content[1] == "gain":
                await message.channel.send(
                    si.get_day_gainers().head(10).iloc[:, :3])
                await message.channel.send(
                    "-------------------------------------------")
                await message.channel.send(
                    si.get_day_gainers().head(10).iloc[:, 4:7])
            elif content[1] == "lose":
                await message.channel.send(
                    si.get_day_losers().head(10).iloc[:, :3])
                await message.channel.send(
                    "-------------------------------------------")
                await message.channel.send(
                    si.get_day_losers().head(10).iloc[:, 4:7])
            elif content[1] == "active":
                await message.channel.send(
                    si.get_day_most_active().head(10).iloc[:, :3])
                await message.channel.send(
                    "-------------------------------------------")
                await message.channel.send(
                    si.get_day_most_active().head(10).iloc[:, 4:7])

        elif content[0] == "crypto":
            await message.channel.send(si.get_top_crypto().head(10).iloc[:, :3]
                                       )
            await message.channel.send(
                "-------------------------------------------")
            await message.channel.send(si.get_top_crypto().head(10).iloc[:,
                                                                         4:5])

        elif content[0] == "help":
            embedVar = discord.Embed(title="List of functioning commands",
                                     description="",
                                     colour=0x00ff00)
            embedVar.add_field(name="\u200b",
                               value="!tsla\n!day gain\n!day loss",
                               inline=True)
            embedVar.add_field(
                name="\u200b",
                value="!calls tlsa 03/19/2021\n!puts tlsa 03/19/2021",
                inline=True)
            await message.channel.send(embed=embedVar)

        elif content[0] == "calls":
            await message.channel.send(
                op.get_calls(content[1], content[2]).iloc[:, 2:8])

        elif content[0] == "puts":
            await message.channel.send(
                op.get_puts(content[1], content[2]).iloc[:, 2:8])

        else:
            temp = si.get_quote_table(content[0])
            change = round(temp["Quote Price"] - temp["Previous Close"], 2)
            percentage = round(change / temp["Previous Close"] * 100, 2)

            displayQuote = str(round(temp["Quote Price"], 2))
            displayChange = str(change)
            displayPercentage = str(percentage)
            displayTicker = content[0].upper()
            displayClose = str(round(temp["Previous Close"], 2))

            dayRange = temp["Day's Range"].replace('-', '').split()

            dayLow = dayRange[0]
            dayHigh = dayRange[1]

            open = temp["Open"]
            close = temp["Previous Close"]

            volume = str(round(temp["Volume"] / 1000000, 2))
            volume = volume + "M"

            avgVolume = str(round(temp["Avg. Volume"] / 1000000, 2))
            avgVolume = avgVolume + "M"

            bid = temp["Bid"]
            ask = temp["Ask"]

            if change >= 0:
                rgb = 0x00ff00
                displayChange = "+" + displayChange
                displayPercentage = "+" + displayPercentage
            else:
                rgb = 0xff0000

            embedVar = discord.Embed(
                title=
                f"${displayTicker}\n${displayQuote} {displayChange} ({displayPercentage}%)",
                description="",
                colour=rgb)
            embedVar.add_field(
                name="\u200b",
                value=
                f"High: {dayHigh}\nLow: {dayLow}\n\nAsk: {ask}\nBid: {bid}",
                inline=True)
            embedVar.add_field(name="\u200b",
                               value=f"Open: {open}\nPrev.: {close}",
                               inline=True)
            embedVar.add_field(
                name="\u200b",
                value=f"Volume: {volume}\nAvg. Vol.: {avgVolume}",
                inline=True)

            await message.channel.send(embed=embedVar)
示例#22
0
########################################################################################

# import stock_info module from yahoo_fin
from yahoo_fin import stock_info as si

# get live price of Apple
si.get_live_price("aapl")

# or Amazon
si.get_live_price("amzn")

# or any other ticker
si.get_live_price(ticker)

# get quote table back as a data frame
si.get_quote_table("aapl", dict_result=False)

# or get it back as a dictionary (default)
si.get_quote_table("aapl")

# get most active stocks on the day
si.get_day_most_active()

# get biggest gainers
si.get_day_gainers()

# get worst performers
si.get_day_losers()

############################################################################################
def main():

    st.title("Welcome to Predict Future of Stocks.")

    menu = ["Home", "Stock Prediction using ML"]
    choice = st.sidebar.selectbox("Menu", menu)

    if choice == "Home":
        st.subheader("Recommendations")

        si.get_day_most_active()
        st.subheader("Today's Most Active Users")
        st.write(si.get_day_most_active())

        si.get_day_gainers()
        st.subheader("Today's Top Gainers")
        st.write(si.get_day_gainers())

        si.get_day_losers()
        st.subheader("Today's Top Losers")
        st.write(si.get_day_losers())

    elif choice == "Stock Prediction using ML":
        st.subheader("Stock Prediction using ML")

        START = "2015-01-01"
        TODAY = date.today().strftime("%Y-%m-%d")

        selected_stock = st.text_input("Type Stocks's name...")

        submit = st.button('Search')
        if submit:

            si.get_live_price(selected_stock)
            st.write("Live Price : ", si.get_live_price(selected_stock))

            si.get_market_status()
            st.write("Market state : ", si.get_market_status())

            n_years = st.slider("Years of prediction:", 1, 10)
            period = n_years * 365

            def load_data(ticker):
                data = yf.download(ticker, START, TODAY)
                data.reset_index(inplace=True)
                return data

            data_load_state = st.text('Loading data...')
            data = load_data(selected_stock)
            data_load_state.text('Loading data... done!')

            st.subheader('Raw data')
            st.write(data.tail())

            # Plot raw data

            def plot_raw_data():
                fig = go.Figure()
                fig.add_trace(
                    go.Scatter(x=data['Date'],
                               y=data['Open'],
                               name="stock_open"))
                fig.add_trace(
                    go.Scatter(x=data['Date'],
                               y=data['Close'],
                               name="stock_close"))
                fig.layout.update(
                    title_text='Time Series data with Rangeslider',
                    xaxis_rangeslider_visible=True)
                st.plotly_chart(fig)

            plot_raw_data()

            # Predict forecast with Prophet.
            df_train = data[['Date', 'Close']]
            df_train = df_train.rename(columns={"Date": "ds", "Close": "y"})

            m = Prophet()
            m.fit(df_train)
            future = m.make_future_dataframe(periods=period)
            forecast = m.predict(future)

            # Show and plot forecast
            st.subheader('Forecast data')
            st.write(forecast.tail())

            st.write(f'Forecast plot for {n_years} years')
            fig1 = plot_plotly(m, forecast)
            st.plotly_chart(fig1)

            st.write("Forecast components")
            fig2 = m.plot_components(forecast)
            st.write(fig2)
示例#24
0
import yfinance as yf
from yahoo_fin import stock_info as si
import re

# def getData(ticker):

#     while True:
#         data = yf.Ticker(ticker)
#         print("Current Price: " + str(data.info['regularMarketPrice']))
#         print("Market Open: " + str(data.info['regularMarketOpen']))
#         print("Market Close: " + str(data.info['previousClose']))
#         print("50 Day Average: " + str(data.info['fiftyDayAverage']))
#         print("200 Day Average: " + str(data.info['twoHundredDayAverage']))
#         print(data.info)


#getData('GME')
print("Enter stock ticker: ")
ticker = input()
print("Current Price: " + str(si.get_live_price(ticker)))
# get most active stocks on the day
print("Most Active:\n " + str(si.get_day_most_active()))
# get biggest gainers
print("Biggest Gainers:\n " + str(si.get_day_gainers()))
# get worst performers
print("Worst performers:\n " + str(si.get_day_losers()))




 
示例#25
0
from yahoo_fin.stock_info import get_data, get_day_most_active
import pandas as pd

nio_weekly = get_data("NIO",
                      start_date="12/21/2020",
                      end_date="01/23/2021",
                      index_as_date=True,
                      interval="1mo")
print(nio_weekly)

nio_active = get_day_most_active()
print(nio_active)
示例#26
0
def overview(request):
    if not Profile.objects.filter(email=request.user.email).exists():
        ins = Profile(email=request.user.email)

        ins.save()
    if request.user.is_authenticated:
        if (Buystock.objects.filter(username=request.user.username).exists()):
            all = Buystock.objects.filter(username=request.user.username)
            for i in all:
                from yahoo_fin import stock_info as si
                r = si.get_live_price(i.ticker)

                s = Stockd.objects.get(ticker=i.ticker,
                                       username=request.user.username)
                s.lastprice = r
                s.change = (s.lastprice / s.price) * 100
                s.save()
                if (s.lastprice > s.price):
                    s.type = 'Profit'
                    s.profit = s.quantity * (s.lastprice - s.price)
                    s.change -= 100
                elif (s.lastprice < s.price):
                    s.type = 'Loss'
                    s.profit = s.quantity * (s.price - s.lastprice)
                    s.change = 100 - s.change
                else:
                    s.type = 'Stable'
                    s.profit = 0.00
                    s.change = 0.00
                s.profit = round(s.profit, 2)
                print(s.profit)
                s.save()
        else:
            messages.warning(request, "Till Now No Stocks are bought")
        from yahoo_fin import stock_info as si
        r = si.get_day_most_active().head(5)
        r.rename(columns={
            'Market Cap': 'Market',
            'Price (Intraday)': 'Price'
        },
                 inplace=True)
        json_records = r.reset_index().to_json(orient='records')
        data = []
        data = json.loads(json_records)
        g = si.get_day_gainers().head(5)
        g.rename(columns={
            'Market Cap': 'Market',
            'Price (Intraday)': 'Price',
            '% Change': 'change'
        },
                 inplace=True)
        json_records = g.reset_index().to_json(orient='records')
        datag = []
        datag = json.loads(json_records)

        lost = si.get_day_losers().head(5)
        lost.rename(columns={
            'Market Cap': 'Market',
            'Price (Intraday)': 'Price',
            '% Change': 'change'
        },
                    inplace=True)
        json_records = lost.reset_index().to_json(orient='records')
        datal = []
        datal = json.loads(json_records)

        if Stockd.objects.filter(username=request.user.username).exists(
        ) and Buystock.objects.filter(username=request.user.username).exists():
            st = Stockd.objects.filter(username=request.user.username)
            po = Profile.objects.get(email=request.user.email)
            return render(request, 'Stock/overview.html', {
                'd': st,
                'r': data,
                'g': datag,
                'l': datal,
                'pho': po
            })
        else:
            po = Profile.objects.get(email=request.user.email)
            return render(request, 'Stock/overview.html', {
                'r': data,
                'g': datag,
                'l': datal,
                'pho': po
            })
    else:
        auth.logout(request)
        return render(request, 'login/login.html')
示例#27
0
import matplotlib.pyplot as plt
from datetime import date
import yfinance as yf
import pandas as pd
from yahoo_fin import stock_info as si
import mplcursors

symbol1 = "AAPL"
stockOne = yf.Ticker(symbol1)

dailyData = pd.DataFrame(stockOne.history(period="1d", interval="1m"))['Open']
dailyData.index = dailyData.index.strftime("%H:%M:%S")

gainers = pd.DataFrame(si.get_day_gainers())[['Price (Intraday)', '% Change']]
gainers.index = pd.DataFrame(si.get_day_gainers())['Symbol']

losers = pd.DataFrame(si.get_day_losers())[['Price (Intraday)', '% Change']]
losers.index = pd.DataFrame(si.get_day_losers())['Symbol']

active = pd.DataFrame(
    si.get_day_most_active())[['Price (Intraday)', '% Change']]
active.index = pd.DataFrame(si.get_day_most_active())['Symbol']

print(pd.DataFrame(si.tickers_dow()))
def stockmarket(tickertxt):

    movers = ya.get_day_most_active()
    st.table(movers.head())

    # Right away we notice that stocks with negative price changes are also included in our results. A filter to get only stocks with a positive % change is applied to get our desired stocks

    # In[58]:

    movers = movers[movers['% Change'] >= 0]
    st.table(movers.head())

    # Excellent! We have successfully scraped the data using the yahoo_fin python module. it is often a good idea to see if those stocks are also generating attention, and what kind of attention it is to avoid getting into false rallies. We will scrap some sentiment data courtesty of [sentdex](http://www.sentdex.com/financial-analysis/). Sometimes sentiments may lag due to source e.g Newsarticle published an hour after event, so we will also utilize [tradefollowers](https://www.tradefollowers.com/strength/twitter_strongest.jsp?tf=1d) for their twitter sentiment data. We will process both lists independently and combine them. For both the sentdex and tradefollowers data we use a 30 day time period. Using a single day might be great for day trading but increases probability of jumping on false rallies.
    #
    # NOTE: Sentdex only has stocks which belong to the S&P 500

    # In[59]:

    res = requests.get('http://www.sentdex.com/financial-analysis/?tf=30d')
    soup = BeautifulSoup(res.text)
    table = soup.find_all('tr')

    # In[60]:

    stock = []
    sentiment = []
    mentions = []
    sentiment_trend = []

    for ticker in table:
        ticker_info = ticker.find_all('td')

        try:
            stock.append(ticker_info[0].get_text())
        except:
            stock.append(None)
        try:
            sentiment.append(ticker_info[3].get_text())
        except:
            sentiment.append(None)
        try:
            mentions.append(ticker_info[2].get_text())
        except:
            mentions.append(None)
        try:
            if (ticker_info[4].find(
                    'span', {"class": "glyphicon glyphicon-chevron-up"})):
                sentiment_trend.append('up')
            else:
                sentiment_trend.append('down')
        except:
            sentiment_trend.append(None)

    company_info = pd.DataFrame(
        data={
            'Symbol': stock,
            'Sentiment': sentiment,
            'direction': sentiment_trend,
            'Mentions': mentions
        })

    st.table(company_info.head(50))

    # We then combine these results with our results from the biggest movers on a given day. This done using a left join of this data frame with the original movers data frame

    # In[61]:

    top_stocks = movers.merge(company_info, on='Symbol', how='left')
    top_stocks.drop(['Market Cap', 'PE Ratio (TTM)'], axis=1, inplace=True)
    st.table(top_stocks.head(50))

    # A couple of stocks pop up with both very good sentiments and an upwards trend in favourability. ZNGA, TWTR and AES for instance stood out as potentially good picks. Note, the mentions here refer to the number of times the stock was referenced according to the internal metrics used by [sentdex](sentdex.com). Let's attempt supplimenting this information with some data based on twitter. We get stocks that showed the strongest twitter sentiments with a time period of 1 month

    # In[62]:

    res = requests.get(
        "https://www.tradefollowers.com/strength/twitter_strongest.jsp?tf=1m")
    soup = BeautifulSoup(res.text)

    stock_twitter = soup.find_all('tr')

    # In[63]:

    twit_stock = []
    sector = []
    twit_score = []

    for stock in stock_twitter:
        try:
            score = stock.find_all("td", {"class": "datalistcolumn"})
            twit_stock.append(score[0].get_text().replace('$', '').strip())
            sector.append(score[2].get_text().replace('\n', '').strip())
            twit_score.append(score[4].get_text().replace('\n', '').strip())
        except:
            twit_stock.append(np.nan)
            sector.append(np.nan)
            twit_score.append(np.nan)

    twitter_df = pd.DataFrame({
        'Symbol': twit_stock,
        'Sector': sector,
        'Twit_Bull_score': twit_score
    })

    # Remove NA values
    twitter_df.dropna(inplace=True)
    twitter_df.drop_duplicates(subset="Symbol", keep='first', inplace=True)
    twitter_df.reset_index(drop=True, inplace=True)
    st.table(twitter_df.head())

    # Twit_Bull_score refers to the internally scoring used at [tradefollowers](tradefollowers.com) to rank stocks based on twitter sentiments, and can range from 1 to as high as 10,000 or greater. With the twitter sentiments obtains, we combine it with our sentiment data to get an overall idea of the data.

    # In[64]:

    st.text("Final List")
    Final_list = top_stocks.merge(twitter_df, on='Symbol', how='left')
    st.table(Final_list)

    # Finally, we include a twitter momentum score.

    # In[65]:

    res2 = requests.get(
        "https://www.tradefollowers.com/active/twitter_active.jsp?tf=1m")
    soup2 = BeautifulSoup(res2.text)

    stock_twitter2 = soup2.find_all('tr')

    # In[66]:

    twit_stock2 = []
    sector2 = []
    twit_score2 = []

    for stock in stock_twitter2:
        try:
            score2 = stock.find_all("td", {"class": "datalistcolumn"})

            twit_stock2.append(score2[0].get_text().replace('$', '').strip())
            sector2.append(score2[2].get_text().replace('\n', '').strip())
            twit_score2.append(score2[4].get_text().replace('\n', '').strip())
        except:
            twit_stock2.append(np.nan)
            sector2.append(np.nan)
            twit_score2.append(np.nan)

    twitter_df2 = pd.DataFrame({
        'Symbol': twit_stock2,
        'Sector': sector2,
        'Twit_mom': twit_score2
    })

    # Remove NA values
    st.text("Final List mit twitter")

    twitter_df2.dropna(inplace=True)
    twitter_df2.drop_duplicates(subset="Symbol", keep='first', inplace=True)
    twitter_df2.reset_index(drop=True, inplace=True)
    st.table(twitter_df2.head(50))

    # We again combine the dataframes to earlier concatanated dataframes. This will form our recommender list

    # In[67]:

    st.text("Final List Recommandet")

    Recommender_list = Final_list.merge(twitter_df2, on='Symbol', how='left')
    Recommender_list.drop(['Volume', 'Avg Vol (3 month)'],
                          axis=1,
                          inplace=True)
    st.table(Recommender_list.head(50))

    # Our list now contains even more informationt to help us with our trades. Stocks which it suggests might generate positive returns include TSLA, ZNGA and TWTR. There is also the posibility that we do not get a stock that falls in all our generated lists, so usage of, for instance, the price information and the twitter data could still give us a good idea of what to expect in terms of performance. As an added measure, we can also obtain information on the sectors to see how they've performed. Again, we will use a one month time period for comparison. The aforementioned stocks belong to the Technology and consumer staples sectors.

    # In[68]:

    sp = SectorPerformances(key='ZQ5ATHRTMUO7YUKR', output_format='pandas')
    time.sleep(10)
    plt.figure(figsize=(8, 8))
    data, meta_data = sp.get_sector()
    st.text(meta_data)
    data['Rank D: Month Performance'].plot(kind='bar')
    plt.title('One Month Performance (%) per Sector')
    plt.tight_layout()
    plt.grid()
    st.pyplot(plt, use_container_width=True)
    #plt.show()

    # The industrials sector appears to be the best performing in this time period. Consumer staples appears to be doing better than IT, but overall they are up which bodes well for potential investors. Please note that this analysis is only a guide to find potentially positive return generating stocks. It is still up to the investor to do the research.

    # ## Part 2: Forecasting using an LSTM
    #
    # In this section, we will atetmpt to apply deep learning to a stock of our chosing to predict future prices. At the time this project was conceived, the stock AMD was selected as it experienced really high gains at the time.

    # First we obtain stock data for our chosen stock. Data from 2014 data up till August of 2020 was obtained for our analysis. Our data will be obtained from yahoo

    # In[69]:

    from datetime import datetime
    from datetime import date

    today = date.today()
    #today.replace("-",",")
    #print(today)

    # In[70]:

    start = datetime(2014, 12, 31)
    end = datetime(2021, 6, 3)
    #print(end)

    # In[71]:

    stock_dt = web.DataReader('AMD', 'yahoo', start, end)
    stock_dt.reset_index(inplace=True)
    st.table(stock_dt.head())

    # In[72]:

    st.table(stock_dt.tail())

    # ### Feature selection/engineering
    #
    # We add additional data that might potentially increase prediction accuracy. Here we use technical indicators.

    # In[73]:

    # Technical Indicators

    # RSI
    t_rsi = TechIndicators(key='ZQ5ATHRTMUO7YUKR', output_format='pandas')
    time.sleep(15)
    data_rsi, meta_data_rsi = t_rsi.get_rsi(symbol='AMD',
                                            interval='daily',
                                            time_period=9,
                                            series_type='open')

    # SMA
    t_sma = TechIndicators(key='ZQ5ATHRTMUO7YUKR', output_format='pandas')
    time.sleep(15)
    data_sma, meta_data_sma = t_sma.get_sma(symbol='AMD',
                                            interval='daily',
                                            time_period=9,
                                            series_type='open')

    #EMA
    t_ema = TechIndicators(key='ZQ5ATHRTMUO7YUKR', output_format='pandas')
    time.sleep(15)
    data_ema, meta_data_ema = t_ema.get_ema(symbol='AMD',
                                            interval='daily',
                                            time_period=9,
                                            series_type='open')

    # In[74]:

    #On Balance volume
    t_obv = TechIndicators(key='ZQ5ATHRTMUO7YUKR', output_format='pandas')
    time.sleep(15)
    data_obv, meta_data_obv = t_obv.get_obv(symbol='AMD', interval='daily')

    # Bollinger bands
    t_bbands = TechIndicators(key='ZQ5ATHRTMUO7YUKR', output_format='pandas')
    time.sleep(15)
    data_bbands, meta_data_bb = t_bbands.get_bbands(symbol='AMD',
                                                    interval='daily',
                                                    series_type='open',
                                                    time_period=9)

    # To learn more about technical indicators and how they are useful in stock analysis, I welcome you to explore [investopedia](https://www.investopedia.com/). Let's combine these indicators into a dataframe

    # In[75]:

    t_ind = pd.concat([data_ema, data_sma, data_rsi, data_obv, data_bbands],
                      axis=1)
    t_ind

    # We then extract the values for the time interval of choice

    # In[76]:

    t_ind = t_ind.loc[start:end].reset_index()

    # Now we combine them with our original dataframe containing price and volume information

    # In[77]:

    df_updated = pd.concat([stock_dt, t_ind], axis=1)
    df_updated.set_index('Date', drop=True, inplace=True)
    st.table(df_updated.tail(20))

    # Before we begin, it is often a good idea to visually inspect the stock data to have an idea of the price trend and volume information

    # In[78]:

    # In[79]:

    mpf.plot(df_updated.loc[datetime(2021, 5, 1):datetime(2021, 6, 3)],
             type='candle',
             style='yahoo',
             figsize=(8, 6),
             volume=True)

    # in the month of July, AMD experienced a massive price surge. Let's have a look at the data with the indicators included

    # In[80]:

    fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(12, 12))

    ax[0].plot(
        df_updated['Open'].loc[datetime(2021, 5, 1):datetime(2021, 6, 11)],
        'k',
        lw=2,
        label='Close')
    ax[0].plot(
        df_updated['EMA'].loc[datetime(2021, 5, 1):datetime(2021, 6, 11)],
        'r',
        lw=1.5,
        label='EMA')
    ax[0].plot(
        df_updated['SMA'].loc[datetime(2021, 5, 1):datetime(2021, 6, 11)],
        'b',
        lw=1.5,
        label='SMA')
    ax[0].plot(df_updated['Real Upper Band'].
               loc[datetime(2021, 5, 1):datetime(2021, 6, 11)],
               'g',
               lw=1.5,
               label='Boolinger band (upper)')
    ax[0].plot(df_updated['Real Lower Band'].
               loc[datetime(2021, 5, 1):datetime(2021, 6, 11)],
               'y',
               lw=1.5,
               label='Boolinger band (lower)')
    ax[0].set_ylabel('Closing price')

    ax[0].legend()

    temp = len(
        df_updated['RSI'].loc[datetime(2021, 5, 1):datetime(2021, 6, 11)])

    ax[1].plot(
        df_updated['RSI'].loc[datetime(2021, 5, 1):datetime(2021, 6, 11)],
        'g',
        lw=2,
        label='RSI')
    ax[1].plot(
        df_updated['RSI'].loc[datetime(2021, 5, 1):datetime(2021, 6, 11)].
        index, 70 * np.ones((temp, 1)).flatten(), 'k')
    ax[1].plot(
        df_updated['RSI'].loc[datetime(2021, 5, 1):datetime(2021, 6, 11)].
        index, 30 * np.ones((temp, 1)).flatten(), 'k')
    ax[1].set_ylabel('RSI')
    #ax[1].legend()

    ax[2].plot(
        df_updated['OBV'].loc[datetime(2021, 5, 1):datetime(2021, 6, 11)],
        'y',
        lw=2,
        label='OBV')
    ax[2].set_ylabel('On balance Volume')
    #ax[2].legend()
    ax[2].set_xlabel('Date')
    st.pyplot(fig)

    # Indicators give us an idea of the direction of future prices. For instance, the Exponential moving average (EMA) crossing the Simple moving average (SMA) might indicate a positive uptrend in price. RSI gives us an idea of how much the stock is being bought or sold. An RSI of 70 for instance might indicate an overbought stock, and tells us the price is very likely to go down in the future, while an RSI of 30 indicates an oversold stock and could potentially be a good buy point for a stock. On balance volume gives us the relative changes in volume, and can potentially identify true rallies or breakouts. Bollinger bands provide an idea of the volatility of the stock.
    #
    # We also want to take into account relative changes between trading days as they tend to be less volatile, and therefore a bit more stationary. We will take the difference between two consecutive days in this case.

    # In[81]:

    df_updated['Diff_Open'] = df_updated['Open'] - df_updated['Open'].shift(1)
    df_updated['Diff_Close'] = df_updated['Close'] - df_updated['Close'].shift(
        1)
    df_updated[
        'Diff-Volume'] = df_updated['Volume'] - df_updated['Volume'].shift(1)
    df_updated['Diff-High'] = df_updated['High'] - df_updated['High'].shift(1)
    df_updated['Diff-Low'] = df_updated['Low'] - df_updated['Low'].shift(1)
    df_updated['Diff-Close (forward)'] = np.where(
        df_updated['Close'].shift(-1) > df_updated['Close'], 1, -1)

    df_updated['High-Low'] = df_updated['High'] - df_updated['Low'].shift(1)
    df_updated['Open-Close'] = df_updated['Open'] - df_updated['Close'].shift(
        1)

    df_updated['Returns'] = df_updated['Open'].pct_change(1)

    # In[82]:

    st.table(df_updated.head())

    # The next step is to visualize how the features relate to each other. We employ a correlation matrix for this purpose

    # In[83]:

    df_updated.drop(['date', 'Real Middle Band', 'Adj Close'],
                    axis=1,
                    inplace=True)

    # In[84]:

    plt.figure(figsize=(12, 8))
    sns.heatmap(df_updated.corr())

    # The closing price has very strong correlations with some of the other price informations such as opening price, highs and lows.
    # On the other hands, the differential prices arn't as correlated. We want to limit the amount of colinearity in our system before running any machine learning routine. So feature selection is a must.

    # ### Feature Selection
    #
    # We utilize two means of feature selection in this section. Random forests and mutual information gain. Random forests are
    # very popular due to their relatively good accuracy, robustness as well as simplicity in terms of utilization. They can directly measure the impact of each feature on accuracy of the model and in essence give them a rank. Information gain on the other hand, calculates the reduction in entropy from transforming a dataset in some way. Mutual information gain essentially evaluates the gain of each variable in the context of the target variable.

    # In[85]:

    # ### Random forest regressor

    # In[88]:

    # Seperate the target variable from the features
    y = df_updated['Close'].iloc[1:].dropna()
    X = df_updated.drop(['Close'], axis=1).iloc[1:].dropna()
    #print("y-Band: ",y.count)
    #print("x-band: ",X.count)

    # In[89]:

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=0)

    # In[90]:

    X_train.shape, y_train.shape

    # In[92]:

    feat = SelectFromModel(
        RandomForestRegressor(n_estimators=100, random_state=0, n_jobs=-1))
    feat.fit(X_train, y_train)
    feat.get_support()

    # In[93]:

    X_train.columns[feat.get_support()]

    # The regressor essentially selected the features that displayed good correlation with the Close price. However, although it selected the most important we would like information on the information gain from each variable. An issue with using random forests is it tends to diminsh the importance of other correlated variables and may lead to incorrect interpretation. However, it does help reduce overfitting

    # ### Mutual information gain

    # In[94]:

    # In[96]:

    mi = mutual_info_regression(X_train, y_train)
    mi = pd.Series(mi)
    mi.index = X_train.columns
    mi.sort_values(ascending=False, inplace=True)

    # In[97]:

    st.table(mi.head(50))

    # The results validate the results using the random forest regressor, but it appears some of the other variables also contribute
    # a decent amount of information. We will select values greater than 2 for our analysis.

    # In[98]:

    sel = SelectKBest(mutual_info_regression, k=8).fit(X_train, y_train)  #
    Features = X_train.columns[sel.get_support()]
    Features.values

    # ### Preprocessing
    #
    # In order to construct a Long short term memory neural network (LSTM), we need to understand its structure. Below is the design of a typical LSTM unit.  Data source: [Researchgate](https://www.researchgate.net/publication/334268507_Application_of_Long_Short-Term_Memory_LSTM_Neural_Network_for_Flood_Forecasting)

    # ![LSTM_structure.jpg](LSTM_structure.jpg)

    # As mentioned earlier, LSTM's are a special type of Recurrent neural networks (RNN). Recurrent neural networks (RNN) are a special type of neural network in which the output of a layer is fed back to the input layer multiple times in order to learn from the past data. Basically, the neural network is trying to learn data that follows a sequence. However, since the RNNs utilize past data, they can become computationally expensive due to storing large amouts of data in memory. The LSTM mitigates this issue, using gates. It has a cell state, and 3 gates; forget, imput and output gates.
    #
    # The cell state is essentially the memory of the network. It carries information throughtout the data sequence processing. Information is added or removed from this cell state using gates. Information from the previous hidden state and current input are combined and passed through a sigmoid function at the forget gate. The sigmoid function determines which data to keep or forget. The transformed values are then multipled by the current cell state.
    #
    # Next, the information from the previous hidden state combined with the input is passed through a sigmoid function to again determine important information, and also a tanh function to transform data between -1 and 1. This transformation helps with the stability of the network and helps deal with the vanishing/exploding gradient problem. These 2 outputs are multiplied together, and the output is added to the current cell state with the sigmoid function applied to it to give us our new cell state for the next time step.
    #
    # Finally, the information from the hidden state combined with the current input are combined and a sigmoid function applied to it. The new cell state is passed through a tanh function to transform the values and both outputs are multiplied to determine the new hidden state for the next time step.
    #
    # Now we have an idea of how the LSTM works, let's construct one. First we split our data into training and test set

    # In[99]:

    df_updated.reset_index(drop=True, inplace=True)

    train_size = int(len(df_updated) * 0.8)
    test_size = len(df_updated) - train_size

    # Make sure to omit the first row, contains NAN's
    train = df_updated.iloc[1:train_size]
    test = df_updated.iloc[train_size:]

    # In[100]:

    train.shape, test.shape

    # In[102]:

    # Extract the features
    total_features = list(Features.values)

    total_features.append('Close')
    total_features

    train = train[total_features]
    test = test[total_features]

    train.shape, test.shape

    # Before we proceed, it is important to scale the data. Scaling is done to ensure one set of features don't have more importance relative to the others. In addition, having values between 0 and 1 will help the neural network converge faster if at all it does. We apply different scalings to the test and training data to avoid leakage into our model.

    # In[103]:

    # Scale both features and target variables

    f_transformer = MinMaxScaler()  # Feature scaler
    targ_transformer = MinMaxScaler()  # Target scaler

    f_transformer = f_transformer.fit(train[Features].to_numpy())
    targ_transformer = targ_transformer.fit(train[['Close']])

    train.loc[:,
              Features] = f_transformer.transform(train[Features].to_numpy())
    train['Close'] = targ_transformer.transform(train[['Close']].to_numpy())

    test.loc[:, Features] = f_transformer.transform(test[Features].to_numpy())
    test['Close'] = targ_transformer.transform(test[['Close']].to_numpy())

    # In[104]:

    train.shape, test.shape

    # The figure below shows how the sequential data for an LSTM is constructed to be fed into the network. Data source: [Althelaya et al, 2018](https://ieeexplore.ieee.org/document/8355458)

    # ![LSTM_data_arrangement.PNG](attachment:LSTM_data_arrangement.PNG)

    # Bassically for data at time t, with a window size of N, the target feature will be the data point at time t, and the feature will be the data points [t-1, t-N]. We then sequentially move forward in time using this approach. We therefore need to format our data that way.

    # In[105]:

    # In[106]:

    time_steps = 10

    X_train_lstm, y_train_lstm = create_dataset(train.drop(['Close'], axis=1),
                                                train['Close'], time_steps)
    X_test_lstm, y_test_lstm = create_dataset(test.drop(['Close'], axis=1),
                                              test['Close'], time_steps)

    # In[108]:

    X_train_lstm.shape, y_train_lstm.shape

    # In[109]:

    X_test_lstm.shape, y_test_lstm.shape

    # ### Building LSTM model
    #
    # The new installment of tensorflow (Tensorflow 2.0) via keras has made implmentation of deep learning models much easier than in previous installments. We will apply a bidrectional LSTM as they have been shown to more effective in certain applications (see [Althelaya et al, 2018](https://ieeexplore.ieee.org/document/8355458)). This due to the fact that the network learns using both past and future data in 2 layers. Each layer performs the operations using reversed time steps to each other. The loss function in this case will be the mean squared error, and the adam optimizer with the default learning rate is applied.

    # In[110]:

    # In[111]:

    model = keras.Sequential()
    model.add(
        keras.layers.Bidirectional(
            keras.layers.LSTM(units=32,
                              input_shape=(X_train_lstm.shape[1],
                                           X_train_lstm.shape[2]))))

    model.add(keras.layers.Dropout(rate=0.2))
    model.add(keras.layers.Dense(units=1))

    # In[112]:

    model.compile(optimizer='adam', loss='mean_squared_error')

    # In[114]:

    history = model.fit(X_train_lstm,
                        y_train_lstm,
                        epochs=90,
                        batch_size=40,
                        validation_split=0.2,
                        shuffle=False,
                        verbose=1)

    # In[115]:

    test_loss = model.evaluate(X_test_lstm, y_test_lstm)

    # In[116]:

    # In[117]:

    plot_learningCurve(history, 90)

    # With each epoch, the validation loss is decreasing but in a bit of a stochastic manner. The training loss is fairly consisten throughout. There maybe some overfitting in there but you can always tune model parameters and explore data more. Let's make some predictions on the test data just to see what's happening

    # In[118]:

    y_pred = model.predict(X_test_lstm)

    # We need to apply some inverse scaling to get back our original results.

    # In[119]:

    y_train_inv = targ_transformer.inverse_transform(
        y_train_lstm.reshape(1, -1))
    y_test_inv = targ_transformer.inverse_transform(y_test_lstm.reshape(1, -1))
    y_pred_inv = targ_transformer.inverse_transform(y_pred)

    # In[120]:

    plt.figure(figsize=(10, 10))
    plt.plot(np.arange(0, len(y_train_lstm)),
             y_train_inv.flatten(),
             'g',
             label="history")
    plt.plot(np.arange(len(y_train_lstm, ),
                       len(y_train_lstm) + len(y_test_lstm)),
             y_test_inv.flatten(),
             marker='.',
             label="true")
    plt.plot(np.arange(len(y_train_lstm),
                       len(y_train_lstm) + len(y_test_lstm)),
             y_pred_inv.flatten(),
             'r',
             label="prediction")
    plt.ylabel('Close Price')
    plt.xlabel('Time step')
    plt.legend()
    st.pyplot(plt, use_container_width=True)
    #plt.show();

    # At first glance we can see that the our predictions are not very great, we could define adjust our model parameters some more. However, they appear to be following the trends pretty well. Let's take a closer look

    # In[121]:

    plt.figure(figsize=(10, 10))
    plt.plot(np.arange(len(y_train_lstm[0:500], ),
                       len(y_train_lstm[0:500]) + len(y_test_lstm[0:500])),
             y_test_inv.flatten()[0:500],
             label="true")
    plt.plot(np.arange(len(y_train_lstm[0:500]),
                       len(y_train_lstm[0:500]) + len(y_test_lstm[0:500])),
             y_pred_inv.flatten()[0:500],
             'r',
             label="prediction")
    plt.ylabel('Close Price')
    plt.xlabel('Time Step')
    plt.legend()
    st.pyplot(plt, use_container_width=True)
    #plt.show();

    # Now it will become apparent why I did not use a large amount of epochs to train my model. At first glance, we notice the LSTM has some implicit autocorrelation in its results since its predictions for a given day are very similar to those of the previous day. It essentially lags. Its basically showing that the best guess of the model is very similar to previous results. This should not be a surprising result; The stock market is influenced by a number of factors such as news, earnings reports, meargers etc. Therefore, it is a bit too choatic and stoachastic to be acurately modelled because it depends on so many factors, some of which can be sporadic i.e positive or negative news. Therefore in my opinion, this may not be the best way to predict stock prices. Of course with major advances in AI there might actually be a way, but I don't think the hedge funds will be sharing their methods anytime soon.

    # ## Part 3: Regression analysis

    # Of course we could still make an attempt to have an idea of what the possible price movements might be. In this case I will utilize the differential prices as there's less volatility compared to using absolute prices. Let's explore these relationships

    # In[122]:

    fig, ax = plt.subplots(nrows=3, ncols=2, figsize=(10, 10))

    ax[0, 0].scatter(df_updated['Open-Close'], df_updated['Diff_Close'], c='k')
    ax[0, 0].legend(['Open-Close'])
    ax[0, 0].set_ylabel('Diff-Close')

    ax[0, 1].scatter(df_updated['High-Low'], df_updated['Diff_Close'], c='k')
    ax[0, 1].legend(['High-Low'])
    ax[0, 1].set_ylabel('Diff-Close')

    ax[1, 0].scatter(df_updated['Diff_Open'], df_updated['Diff_Close'], c='k')
    ax[1, 0].legend(['Diff-Open'])
    ax[1, 0].set_ylabel('Diff-Close')

    ax[1, 1].scatter(df_updated['Diff-Low'], df_updated['Diff_Close'], c='k')
    ax[1, 1].legend(['Diff-Low'])
    ax[1, 1].set_ylabel('Diff-Close')

    ax[2, 0].scatter(df_updated['Diff-High'], df_updated['Diff_Close'], c='k')
    ax[2, 0].legend(['Diff-High'])
    ax[2, 0].set_ylabel('Diff-Close')

    ax[2, 1].scatter(df_updated['Open'], df_updated['Diff_Close'], c='k')
    ax[2, 1].legend(['Open'])
    ax[2, 1].set_ylabel('Diff-Close')

    st.pyplot(fig)

    # Above are a series of plots that show the relationship between different differential price measurements and the differential close. In this study, the differece relates to the difference between a value at time t and the previous day value at time t-1. The Differential high, differential low, differential high-low and differential open-close appear to have a linear relationship with the differential close. However, only the differential open-close would be useful in an analysis. This because on a given day (time t), we can not know what the highs or lows are before hand till the day ends. However, we know the open value at the start of the trading period.

    # Let's separate the data features and target variables. We will use Ridge regression in this case to make our model more generalizable

    # In[123]:

    # In[124]:

    X_reg = df_updated[['Open-Close']]
    y_reg = df_updated['Diff_Close']

    # In[125]:

    X_reg = X_reg.loc[1:, :]
    y_reg = y_reg.iloc[1:]

    # In[126]:

    X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
        X_reg, y_reg, test_size=0.2, random_state=0)

    # We will perform a grid search and cross validation to determine optimal paramters for our regresison model

    # In[127]:

    ridge = Ridge()
    alphas = [
        1e-15, 1e-8, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0, 1, 5, 10, 20, 30,
        40, 45, 50, 55, 100
    ]
    params = {'alpha': alphas}

    # In[129]:

    ridge_regressor = GridSearchCV(ridge,
                                   params,
                                   scoring='neg_mean_squared_error',
                                   cv=10)
    ridge_regressor.fit(X_reg, y_reg)

    # In[130]:

    st.text(ridge_regressor.best_score_)
    st.text(ridge_regressor.best_params_)

    # Finally, let's produce a plot and see how it fits

    # In[131]:

    np.shape(X_test_reg)

    # In[133]:

    regr = Ridge(alpha=1e-15)
    regr.fit(X_train_reg, y_train_reg)

    y_pred = regr.predict(X_test_reg)
    y_pred_train = regr.predict(X_train_reg)

    st.text(f'R^2 value for test set is {regr.score(X_test_reg,y_test_reg)}')
    st.text(f'Mean squared error is {mean_squared_error(y_test_reg,y_pred)}')

    plt.scatter(df_updated['Open-Close'][1:],
                df_updated['Diff_Close'][1:],
                c='k')
    plt.plot(df_updated['Open-Close'][1:],
             (regr.coef_[0] * df_updated['Open-Close'][1:] + regr.intercept_),
             c='r')
    plt.xlabel('Open-Close')
    plt.ylabel('Diff-Close')
    st.pyplot(plt, use_container_width=True)

    # We obtained a mean square error of 0.58 which is fairly moderate. Our R^2 value basically says 54% of the variance in the
    # differential close price is explained by the differential open-close price. Not so bad so far. But to be truly effective, we need to make use of statistics. Specifically, let's define a confidence interval around our predictions i.e prediction intervals.
    #
    # Prediction intervals give you a range for the prediction that accounts for any threshold of modeling error. Prediction intervals are most commonly used when making predictions or forecasts with a regression model, where a quantity is being predicted. We select the 95% confidence interval in this example such that our actual predictions fall into this range 99% of the time. For an in-depth overview and explanation please explore [machinelearningmastery](https://machinelearningmastery.com/prediction-intervals-for-machine-learning/)

    # In[135]:

    # In[136]:

    lower, upper, interval = predict_range(X_reg, y_reg, regr)

    # In[138]:

    plt.scatter(X_reg, df_updated['Diff_Close'][1:], c='k')
    plt.plot(X_reg, lower, c='b')
    plt.plot(X_reg,
             (regr.coef_[0] * df_updated['Open-Close'][1:] + regr.intercept_),
             c='r')
    plt.plot(X_reg, upper, c='g')

    #plt.errorbar(X_reg , (regr.coef_[0] * df_updated['Open-Close'][1:] + regr.intercept_),yerr=interval)
    #

    plt.xlabel('Open-Close')
    plt.ylabel('Diff-Close')
    plt.legend(['Upper bound', 'Model', 'Lower bound'])
    st.pyplot(plt, use_container_width=True)
示例#29
0
def get_most_active_stocks():
    # TOO 100
    return si.get_day_most_active()
示例#30
0
 def getMostActive(self):
     mostActive = si.get_day_most_active()[0:10]
     mostActive['volume_change'] = self.yahooFinanceDataModification.getPercentigeChangeInVolume(mostActive)
     return self.yahooFinanceDataModification.formatTopGainersOrLoosersOrActive(mostActive)