Пример #1
0
def fetch_FA_data(ticker, start_date, end_date, fname):

    url = ("https://financialmodelingprep.com/api/v3/historical-price-full/AAPL?apikey=a6d00b2ab555b1e00ed8d26c51b79dcb")
    data = get_jsonparsed_data(url)

    data.to_csv(fname, columns=["date", "open", "close",
                                "high", "low", "volume", "adjClose"], index=False)
Пример #2
0
def build_features(data):
    # Feature name list
    predictor_names = []

    # Compute price difference as a feature
    data["diff"] = np.abs(
        (data["Close"] - data["Close"].shift(1)) / data["Close"]).fillna(0)
    predictor_names.append("diff")

    # Compute the volume difference as a feature
    data["v_diff"] = np.abs(
        (data["Volume"] - data["Volume"].shift(1)) / data["Volume"]).fillna(0)
    predictor_names.append("v_diff")

    for i in range(1, NUM_DAYS_BACK + 1):
        data["p_" + str(i)] = np.where(data["Close"] > data["Close"].shift(i),
                                       1, 0)  # i: number of look back days
        predictor_names.append("p_" + str(i))

    data["next_day"] = np.where(data["Close"].shift(-1) > data["Close"], 1, 0)
    data["next_day_opposite"] = np.where(data["next_day"] == 1, 0, 1)
    train_data = data["2015-01-20":"1990-02-05"]
    train_test = data["2018-01-20":"2015-01-25"]

    # If you want to save the file to your local drive
    data.to_csv("f_params.csv")
    return predictor_names, train_data, train_test
def fetch_tiingo_data(ticker, start_date, end_date, fname):
    url = "https://api.tiingo.com/tiingo/daily/{ticker}/prices?startDate={start_date}&endDate={end_date}&token={token}"
    token = "ca5a6f47a99ae61051e4de63b26f727b1709a01d"
    data = pd.read_json(url.format(
        ticker=ticker,
        start_date=start_date,
        end_date=end_date,
        token=token
    ))
    data.to_csv(fname, columns=["date", "open", "close",
                                "high", "low", "volume", "adjClose"], index=False)
Пример #4
0
def main():
    ts = TimeSeries(key=api_key, output_format='pandas')
    for symbol in symbols:
        try:
            data, meta_data = ts.get_daily(symbol.lower(), outputsize='full')
            data.sort_values('date', inplace=True, ascending=False)
            fn = f'{symbol.lower()}_{time_window}.csv'
            data.to_csv(f'data/{fn}')
            print(f"<option value='{fn}'>{fn}</option>")
        except:
            print(symbol + " err")
            pass
Пример #5
0
def MergeAll(symbols=[]):
    csvList=symbols.copy()
    if csvList ==[]:
        csvList=[x for x in os.listdir('Qian/Finance/data') if (x[-4:]=='.csv' and x!='data.csv')]
    else:
        for idxCSV in range(len(csvList)):
            if csvList[idxCSV]=='cpi':
                csvList[idxCSV]='dataCPIUSA.csv'

            if csvList[idxCSV]=='gold':
                csvList[idxCSV]='dataGold.csv'

            if csvList[idxCSV]=='misery':
                csvList[idxCSV]='dataMiseryUS.csv'

            if csvList[idxCSV]=='brent':
                csvList[idxCSV]='dataOilBrent.csv'

            if csvList[idxCSV]=='wti':
                csvList[idxCSV]='dataOilWTI.csv'

            if csvList[idxCSV]=='silver':
                csvList[idxCSV]='dataSilver.csv'

            if csvList[idxCSV]=='spx':
                csvList[idxCSV]='dataSPX.csv'

            if csvList[idxCSV]=='usd':
                csvList[idxCSV]='dataUSDIndex.csv'

            if csvList[idxCSV]=='inflation':
                csvList[idxCSV]='dataInflationUS.csv'

            if csvList[idxCSV]=='unemployment':
                csvList[idxCSV]='dataUnemploymentUS.csv'


    for iCSV in csvList:

        iData=pd.read_csv('Qian/Finance/data/'+iCSV)
        if 'data' not in locals():
            data=iData
            data=iData
        else:
            data=pd.merge(left=data,right=iData,how='inner')

    data.rename(columns={'Date':'date'},inplace=True)

    data.to_csv('Qian/Finance/data/data.csv',index=False)

    return data
Пример #6
0
 def get_data_to_csv(self,
                     symbol,
                     start=datetime(2000, 1, 1),
                     end=datetime(2016, 1, 1)):
     """
     get data and store to csv
     :param symbol:
     :param start:
     :param end:
     :return:
     """
     data = self.get_data(symbol, start, end)
     if not data.empty:
         data.to_csv(self.symbol_to_path(symbol))
     return data
Пример #7
0
def PicSymbolCorr(symbols=['cpi','gold','misery','brent','wti','silver','spx','usd','unemployment','inflation'],window=31,dateStart='1900-01-01',dateEnd=None,monthDelta=12,corrCut=0.6):
    os.makedirs('Qian/Finance/picSymbolCorr',exist_ok=True)

    if dateEnd is None:
         dateEnd=datetime.datetime.today().date()
    else:
        dateEnd=dateEnd.replace('/','-')
        dateEnd=datetime.datetime.strptime(dateEnd, '%Y-%m-%d').date()


    dateStart=dateStart.replace('/','-')
    dateStart=datetime.datetime.strptime(dateStart, '%Y-%m-%d').date()

    dateStart_,dateEnd_=dateStart,dateEnd



    for idxSym in range(len(symbols)):
        for jdxSym in range(idxSym):
            iSym=symbols[idxSym]
            jSym=symbols[jdxSym]

            data=MergeAll([iSym,jSym])

            os.makedirs('Qian/Finance/dataCorr/',exist_ok=True)
            data.to_csv('Qian/Finance/dataCorr/%s-%s.csv'%(iSym,jSym),index=False)


            date=data['date'].values
            iY=data[iSym].values
            jY=data[jSym].values

            date=np.array([datetime.datetime.strptime(d, '%Y-%m-%d').date() for d in date])

            iY=iY[date>=dateStart_]
            jY=jY[date>=dateStart_]
            date=date[date>=dateStart_]


            iY=iY[date<=dateEnd_]
            jY=jY[date<=dateEnd_]
            date=date[date<=dateEnd_]

            if len(date)<window+1:
                continue

            date=date[1:]
            dateStart,dateEnd= date[0],date[-1]


            iY[np.isnan(iY)]=1e-6
            jY[np.isnan(jY)]=1e-6

            iY[iY==0]=1e-6
            jY[jY==0]=1e-6

            idY=np.log(iY[1:]/iY[:-1])
            jdY=np.log(jY[1:]/jY[:-1])

            iY=iY[1:]
            jY=jY[1:]


            iData=[iY,idY]
            jData=[jY,jdY]

            flagData=['Ori','Log']

            for idxI in [0,1]:
                for idxJ in [0,1]:
                    iI=iData[idxI]
                    iJ=jData[idxJ]

                    iI[np.isnan(iI)]=1e-6
                    iJ[np.isnan(iJ)]=1e-6

                    corr=np.corrcoef(iI,iJ)

                    if np.abs(corr[0,1]) < corrCut:
                        continue

                    if np.isnan(corr[0,1]):
                        continue


                    dataType=[flagData[idxI],flagData[idxJ]]

                    figName='Corr'+dataType[0]+iSym.capitalize()+dataType[1]+jSym.capitalize()

                    title='%s(%s) - %s(%s) : %.3f'%(iSym.capitalize(),dataType[0],jSym.capitalize(),dataType[1],corr[0,1])
                    yLabel=[dataType[0]+iSym.capitalize(),dataType[1]+jSym.capitalize()]

                    if window==0:
                        iI=iI
                        iJ=iJ
                    else:
                        iI=savgol_filter(iI,window,3)
                        iJ=savgol_filter(iJ,window,3)

                    fig=PlotDateYY(date,y1=iI,y2=iJ,figName=figName,title=title,figSize=(15,8),monthDelta=monthDelta,yLabel=yLabel)

                    os.makedirs('Qian/Finance/picSymbolCorr/%d-%s-%s'%(window,dateStart_,dateEnd_),exist_ok=True)
                    plt.savefig('Qian/Finance/picSymbolCorr/%d-%s-%s/%.3f-%s.png'%(window,dateStart_,dateEnd_,np.abs(corr[0,1]),figName))
                    plt.close(fig)
Пример #8
0
    '成交量': 'tradingVol',
    '成交金额': 'cashVol',
    '总市值': 'marketValue',
    '流通市值': 'liveValue'
}
# a.rename(columns={'日期':'Date', '股票代码':'id', '名称':'name', '收盘价':'close', '最高价':'high',
#                   '最低价':'low','开盘价': 'open', '前收盘':'pre-close', '涨跌额':'variation', '涨跌幅':'return',
#                   '换手率':'changeRatio', '成交量':'tradingVol','成交金额': 'cashVol', '总市值':'marketValue', '流通市值':'liveValue'}, inplace = True)
[q.rename(columns=columnsNames, inplace=True) for q in quotes]
testdata = ts.tsdata()
for isin, q in zip(symbols, quotes):
    testdata.insetData(isin, q)
    print(time.strftime('%H:%M:%S'), isin)

for name, data in zip(testdata.colNames, testdata.dataFrames):
    data.to_csv('data/{}.csv'.format(name))

testdata.dataFrames[0].head(10)
print(testdata.find(['close', 'open']))
# fdata = testdata.importData(quotes[0])

import correlandplot as crp

df = testdata.dataFrames[testdata.find(['return'])[0]]
ddf = testdata.dataFrames[testdata.find(['pre-close'])[0]]
d1 = datetime.datetime(2015, 1, 1)
df = df[df.index > d1]
ddf = ddf[ddf.index > d1]
df0 = df / ddf
df = df.replace('None', np.nan)
df = df.replace('NaN', np.nan)
Пример #9
0
                  delimiter=",",
                  unpack=False)

for stock in tickers:
    print(stock)
    data = quandl.get_table(
        'WIKI/PRICES',
        ticker=stock,
        qopts={'columns': ['ticker', 'date', 'open', 'close']},
        date={
            'gte': '2018-01-01',
            'lte': '2018-04-16'
        },
        paginate=True)
    csvfilename = stock + '.csv'
    data.to_csv(csvfilename)
    data.head()

#-----------------------------------
#Extrace holdings from etf into file
ETFtickers = loadtxt("etflist.csv",
                     dtype=str,
                     comments="#",
                     delimiter=",",
                     unpack=False)

#keys = ["XLU","XLRE"] #list of tickers whose financial data needs to be extracted
#financial_dir = {}

#keys = ['XLU', 'XLRE']
headers = {
        data['macd'] = stock['macd']
        #		data['sma']=stock['sma']
        #		data['ema']=stock['ema']
        data['proc'] = stock['open_-2_r']

        del data['close_-1_s']
        del data['close_-1_d']
        del data['rs_14']
        del data['rsi_14']

        #		for j in data['rsi']:
        #			print(i)

        try:
            #			new_path = '/Users/omnisciemployee/Documents/Data/Stocks_3/'
            #			data.to_csv(new_path+i+'_full.csv', index=False)
            #			temp = pd.read_csv("/Users/omnisciemployee/Documents/Data/Stocks_3/"+i+"_full.csv")
            #			data1, temp = [d.reset_index(drop=True) for d in (data1, temp)]
            #			data.join(data1)
            new_path = '/Users/omnisciemployee/Documents/Data/Stocks_last/'
            data.to_csv(new_path + i + '_full.csv', index=False)
        except Exception as e:
            print(e)
            print(i)
            continue

    except Exception as e:
        print(i)
        print(e)
        continue
import os
import bitfinex
api_v2 = bitfinex.bitfinex_v2.api_v2()
result = api_v2.candles()
import datetime
import time
from pandas_datareader import data
import matplotlib.pyplot as plt
import pandas as pd
import yfinance as yf

data = yf.download("AR", start="2010-01-03", end="2020-1-11")
data = pd.DataFrame(data)
# data = data.drop(['Adj Close'],axis=1)
print(data.tail())
data = data.to_csv('data/stocks/stocks_portfolio/AR.csv')
print('RETRIEVING DAILY STOCK DATA FOR {}'.format(str('AR')))

data = yf.download("CHK", start="2010-01-03", end="2020-1-11")
data = pd.DataFrame(data)
# data = data.drop(['Adj Close'],axis=1)
print(data.tail())
data = data.to_csv('data/stocks/stocks_portfolio/CHK.csv')
print('RETRIEVING DAILY STOCK DATA FOR {}'.format(str('CHK"')))

data = yf.download("PCG", start="2010-01-03", end="2020-1-11")
data = pd.DataFrame(data)
# data = data.drop(['Adj Close'],axis=1)
print(data.tail())
data = data.to_csv('data/stocks/stocks_portfolio/PCG.csv')
print('RETRIEVING DAILY STOCK DATA FOR {}'.format(str('PCG"')))
BAC['Close'].loc['2019-01-01':'2020-07-31'].ta_plot(study='boll')

"""**candle plot of Apple's stock from Jan 1st 2019 to August 31st 2020.**"""

configure_plotly_browser_state()
APPL[['Open', 'High', 'Low', 'Close']].loc['2019-01-01':'2020-07-31'].iplot(kind='candle')

"""**Simple Moving Averages plot of Apple for the year 2019 and 2020.**"""

configure_plotly_browser_state()
APPL['Close'].loc['2019-01-01':'2020-07-31'].ta_plot(study='sma',periods=[14,30,60],title='Simple Moving Averages')

"""# **Build stock price prdiction model using LSTM for APPLE Inc.**"""

data=APPL.drop(['High', 'Low', 'Open', 'Volume', 'Adj Close'], axis=1)
data.to_csv('data.csv',sep=',')

data.head()

import csv
time_step = []
price = []
temp=0
with open('data.csv') as csvfile:
  reader = csv.reader(csvfile, delimiter=',')
  next(reader)
  for row in reader:
    price.append(float(row[1]))
    time_step.append(temp)
    temp+=1
Пример #13
0
def Amzon_vendor_weight_sum():
    A_vendor_weigh = pd.read_csv('Amazon_5weeks.csv')

    # 데이터 타임으로 변경
    A_vendor_weigh['Date'] = pd.to_datetime(A_vendor_weigh.loc[:, 'Date'])

    # 두 컬럼만 불러오기
    A_vendor_weigh = A_vendor_weigh[['Date', 'Vendor']]
    A_vendor_weigh.head()

    # 내림차순으로 정렬
    A_vendor_weigh = A_vendor_weigh.sort_values(['Date'], ascending=[False])

    # 데이트 뉴, 앞으로 설정
    A_vendor_weigh = A_vendor_weigh.set_index('Date')

    # 주차별로 멀티 인덱스 만들기
    A_vendor_weigh1 = A_vendor_weigh.groupby(
        ['Vendor', 'Date']).resample('w').count().unstack()
    A_vendor_weigh1.head()

    # 결측값 = 0 처리
    A_vendor_weigh1 = A_vendor_weigh1.fillna(0)

    # 데이트 뉴 행으로 합치기
    A_vendor_weigh2 = A_vendor_weigh1.sum(axis=1, level='Date')

    # 주+벤더별 정렬하기
    A_vendor_weigh3 = A_vendor_weigh2.sum(axis=0, level='Vendor')

    # 칼럼명 변경
    A_vendor_weigh3.columns = [
        "Week30", "Week31", "Week32", "Week33", "Week34"
    ]

    # 가중치 더해서 weight_sum하기
    A_vendor_weigh3["Week34"] = A_vendor_weigh3["Week34"] * (0.95)
    A_vendor_weigh3["Week33"] = A_vendor_weigh3["Week33"] * (0.90)
    A_vendor_weigh3["Week32"] = A_vendor_weigh3["Week32"] * (0.85)
    A_vendor_weigh3["Week31"] = A_vendor_weigh3["Week31"] * (0.80)
    A_vendor_weigh3["Week30"] = A_vendor_weigh3["Week30"] * (0.75)

    # 칼럼명으로 오름차순 정렬

    A_vendor_weigh3 = A_vendor_weigh3.sort_index(axis=1, ascending=True)

    ####################################################
    #####주차별 가중치합 (기존의 Weight_sum과 달리, 2~4주차치만 뽑은 데이터!)

    A_vendor_weigh3['Weight_sum'] = A_vendor_weigh3['Week34'] + A_vendor_weigh3['Week33'] + A_vendor_weigh3['Week32'] + \
                                    A_vendor_weigh3['Week31'] + A_vendor_weigh3['Week30']

    # 내림차순으로 정렬
    A_vendor_weigh3 = A_vendor_weigh3.sort_values(["Weight_sum"],
                                                  ascending=[False])

    # Weight_sum탈락시키기
    A_vendor_weigh3_review_count = A_vendor_weigh3.drop(
        A_vendor_weigh3.columns[-1:], axis='columns')

    # csv파일로 저장하기
    data = pd.DataFrame(A_vendor_weigh3)
    data.to_csv('Amazon_vendor_weight_sum.csv')

    # 벤더 + weigh_sum만 불러오기
    A_vendor_weigh4 = A_vendor_weigh3[['Weight_sum']]

    # 파이차트 그리기
    plt.pie(A_vendor_weigh4,
            explode=(0.1, 0, 0, 0, 0, 0, 0),
            labels=[
                'Samsung', 'Xiaomi', 'OPPO', 'Vivo', 'Realme', 'Huawei',
                'Others'
            ],
            colors=[
                'dodgerblue', '#F44E54', 'green', '#FDDB5E', '#FF9904',
                '#76AD3B', '#BAF1A1'
            ],
            startangle=180,
            autopct='%1.2f%%')

    plt.rcParams.update({'font.size': 25})
    plt.rcParams['figure.figsize'] = [20, 15]
    plt.axis('equal', fontsize=18)
    plt.title('Amazon Review Share', fontsize=40)
    plt.legend(fontsize=20, loc='upper right')

    plt.show()
    # 백분율 구하기 vendor_weigh_2
    A_vendor_weigh3_per = (A_vendor_weigh3[0:20] /
                           A_vendor_weigh3[0:20].sum()) * 100

    # csv파일로 저장하기 (주별 리뷰 점유율 percentage)
    data1 = pd.DataFrame(A_vendor_weigh3_per)
    data1.to_csv('weekly_Amazon_vendor_weight_sum_percetage#.csv')

    # 행과 열을 바꾸기
    A_vendor_weigh3_per = np.transpose(A_vendor_weigh3_per)
    A_vendor_weigh3_per.head()

    # 누적막대그래프 만들기
    A_vendor_weigh3_per.plot.bar(stacked=True,
                                 fontsize=25,
                                 colors=[
                                     'dodgerblue', '#F44E54', 'green',
                                     '#FDDB5E', '#FF9904', '#76AD3B', '#BAF1A1'
                                 ],
                                 alpha=0.7)

    # Set the title and labels
    plt.rcParams['figure.figsize'] = [15, 13]
    plt.legend(fontsize=22, loc='lower right')
    plt.xlabel('weeks', fontsize=23)
    plt.ylabel('Percentage', fontsize=23)
    plt.title('Amazon Weekly Review Share', fontsize=40)

    # show the plot
    plt.show()

    x = A_vendor_weigh3_review_count.columns[0:]
    y = A_vendor_weigh3_review_count[0:]

    pal = [
        'dodgerblue', '#F44E54', 'green', '#FDDB5E', '#FF9904', '#76AD3B',
        '#BAF1A1'
    ]
    plt.stackplot(x,
                  y,
                  labels=[
                      'Samsung', 'Xiaomi', 'OPPO', 'Vivo', 'Realme', 'Huawei',
                      'Others'
                  ],
                  colors=pal,
                  alpha=0.7)

    plt.title("Amazon Weekly Review Count",
              fontsize=40,
              fontweight=0,
              color='black')
    plt.xlabel("Weeks", fontsize=24)
    plt.xticks(fontsize=24)

    plt.ylabel("Review counts", fontsize=24)
    plt.yticks(fontsize=24)

    plt.legend(loc='upper right', fontsize=22)
    plt.show()
         data.Position[data.Position < 0],
         color='g',
         lw=0,
         marker='_',
         label='short')
plt.axhline(y=0, lw=0.5, color='k')
for i in range(NUM_SHARES_PER_TRADE, NUM_SHARES_PER_TRADE * 25,
               NUM_SHARES_PER_TRADE * 5):
    plt.axhline(y=i, lw=0.5, color='r')
for i in range(-NUM_SHARES_PER_TRADE, -NUM_SHARES_PER_TRADE * 25,
               -NUM_SHARES_PER_TRADE * 5):
    plt.axhline(y=i, lw=0.5, color='g')
plt.legend()
plt.show()

data['Pnl'].plot(color='k', lw=1., legend=True)
plt.plot(data.loc[data.Pnl > 0].index,
         data.Pnl[data.Pnl > 0],
         color='g',
         lw=0,
         marker='.')
plt.plot(data.loc[data.Pnl < 0].index,
         data.Pnl[data.Pnl < 0],
         color='r',
         lw=0,
         marker='.')
plt.legend()
plt.show()

data.to_csv("basic_mean_reversion.csv", sep=",")
         data.Position[data.Position < 0],
         color='g',
         lw=0,
         marker='_',
         label='short')
plt.axhline(y=0, lw=0.5, color='k')
for i in range(NUM_SHARES_PER_TRADE, NUM_SHARES_PER_TRADE * 25,
               NUM_SHARES_PER_TRADE * 5):
    plt.axhline(y=i, lw=0.5, color='r')
for i in range(-NUM_SHARES_PER_TRADE, -NUM_SHARES_PER_TRADE * 25,
               -NUM_SHARES_PER_TRADE * 5):
    plt.axhline(y=i, lw=0.5, color='g')
plt.legend()
plt.show()

data['Pnl'].plot(color='k', lw=1., legend=True)
plt.plot(data.loc[data.Pnl > 0].index,
         data.Pnl[data.Pnl > 0],
         color='g',
         lw=0,
         marker='.')
plt.plot(data.loc[data.Pnl < 0].index,
         data.Pnl[data.Pnl < 0],
         color='r',
         lw=0,
         marker='.')
plt.legend()
plt.show()

data.to_csv("volatility_adjusted_trend_following.csv", sep=",")
data['APO'].plot(color='k', lw=3., legend=True)
plt.plot(data.loc[ data.Trades == 1 ].index, data.APO[data.Trades == 1 ], color='r', lw=0, marker='^', markersize=7, label='buy')
plt.plot(data.loc[ data.Trades == -1 ].index, data.APO[data.Trades == -1 ], color='g', lw=0, marker='v', markersize=7, label='sell')
plt.axhline(y=0, lw=0.5, color='k')
for i in range( APO_VALUE_FOR_BUY_ENTRY, APO_VALUE_FOR_BUY_ENTRY*5, APO_VALUE_FOR_BUY_ENTRY ):
  plt.axhline(y=i, lw=0.5, color='r')
for i in range( APO_VALUE_FOR_SELL_ENTRY, APO_VALUE_FOR_SELL_ENTRY*5, APO_VALUE_FOR_SELL_ENTRY ):
  plt.axhline(y=i, lw=0.5, color='g')
plt.legend()
plt.show()

data['Position'].plot(color='k', lw=1., legend=True)
plt.plot(data.loc[ data.Position == 0 ].index, data.Position[ data.Position == 0 ], color='k', lw=0, marker='.', label='flat')
plt.plot(data.loc[ data.Position > 0 ].index, data.Position[ data.Position > 0 ], color='r', lw=0, marker='+', label='long')
plt.plot(data.loc[ data.Position < 0 ].index, data.Position[ data.Position < 0 ], color='g', lw=0, marker='_', label='short')
plt.axhline(y=0, lw=0.5, color='k')
for i in range( NUM_SHARES_PER_TRADE, NUM_SHARES_PER_TRADE*25, NUM_SHARES_PER_TRADE*5 ):
  plt.axhline(y=i, lw=0.5, color='r')
for i in range( -NUM_SHARES_PER_TRADE, -NUM_SHARES_PER_TRADE*25, -NUM_SHARES_PER_TRADE*5 ):
  plt.axhline(y=i, lw=0.5, color='g')
plt.legend()
plt.show()

data['Pnl'].plot(color='k', lw=1., legend=True)
plt.plot(data.loc[ data.Pnl > 0 ].index, data.Pnl[ data.Pnl > 0 ], color='g', lw=0, marker='.')
plt.plot(data.loc[ data.Pnl < 0 ].index, data.Pnl[ data.Pnl < 0 ], color='r', lw=0, marker='.')
plt.legend()
plt.show()

data.to_csv("volatility_adjusted_mean_reversion.csv", sep=",")
# In[3]:

data.head(3)

# In[4]:

le = data['CO AQI']
le[le.isnull()]

# In[5]:

##Look Into each value
data = data.dropna()
data.to_csv("/Users/xiangliu/Desktop/CSC560 Data/pollution_AQI.csv",
            index=True,
            sep=',')

# In[6]:

data = pd.read_csv('/Users/xiangliu/Desktop/CSC560 Data/pollution_AQI.csv')
data.shape

# In[7]:

data.head(3)

# In[8]:

data.groupby(['State']).count()
#Found there are 5 states no there(Montana, Nebraska, Mississippi, West virgina, Vermont)
Пример #18
0
 if arg == ma:
     data['<DATE>'] = pd.to_datetime(data['<DATE>'])
     data["<DATE>"] = data["<DATE>"].apply(mdates.date2num)
     data["MA50"] = data['<OPEN>'].rolling(20).mean()
     sma = data["MA50"]
     ohlc = data[['<DATE>', '<OPEN>', '<HIGH>', '<LOW>', '<CLOSE>']].copy()
     f1, ax = plt.subplots(figsize=(15, 8))
     candlestick_ohlc(ax,
                      ohlc.values,
                      width=.6,
                      colorup='green',
                      colordown='red')
     ax.plot(data['<DATE>'], sma, label='SMA')
     ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
     plt.savefig('sma_arguments_ma.png')
     data.to_csv('sma_arguments_ma.csv', index=False)
 elif arg == macd:
     data['<DATE>'] = pd.to_datetime(data['<DATE>'])
     data["<DATE>"] = data["<DATE>"].apply(mdates.date2num)
     ema12 = data['<CLOSE>'].ewm(span=12, min_periods=12,
                                 adjust=False).mean()
     ema26 = data['<CLOSE>'].ewm(span=26, min_periods=26,
                                 adjust=False).mean()
     data["MACD"] = ema12 - ema26
     ohlc = data[['<DATE>', '<OPEN>', '<HIGH>', '<LOW>', '<CLOSE>']].copy()
     f1, ax = plt.subplots(figsize=(15, 8))
     candlestick_ohlc(ax,
                      ohlc.values,
                      width=.6,
                      colorup='green',
                      colordown='red')
Пример #19
0
#This file get latest stock trends for a given company which is specified in the comm
#command line

import numpy as np
import pandas as pd
import datetime
from sklearn import preprocessing
from datetime import datetime
from sklearn.ensemble import RandomForestClassifier
from sklearn import neighbors
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
import operator
import re
from dateutil import parser
import json
import requests
import urllib
from pandas_datareader import data
import sys

if '__name__' == 'main':
    company = sys.argv[1]
    start_date = sys.argv[2]
    end_date = sys.argv[3]
    data = data.DataReader(company, 'yahoo', start_date, end_date)
    data.to_csv('Data/' + company + '.csv')
# y = 4.120264489871436e-06*x + -5250.689416195902
# plt.plot(x,y)

plt.show()

print(data.shape)
# test_x = np.linspace(0,35, n_samples)
# test_y = 5* test_x + 5 * np.random.randn(n_samples)

# plt.plot(test_x, test_y, 'o')
data['Timestamp'] = data['Date'].apply(lambda date: time.mktime(
    datetime.datetime.strptime(str(date), "%Y-%m-%d %H:%M:%S").timetuple()))
data['Scale Close'] = data['Close'].apply(
    lambda close: close / data["Close"].max())
data['Scale Close'].plot()
data.to_csv('stocks.csv')
test_x = data['Timestamp']
test_y = data["Scale Close"]

X = tf.compat.v1.placeholder(tf.float32)
Y = tf.compat.v1.placeholder(tf.float32)

W = tf.Variable(1, name="weights", dtype=tf.float32)
B = tf.Variable(0, name="bias", dtype=tf.float32)

# x = np.linspace(0,1570,100)
# plt.plot(x, W*x + B)
# plt.show()

pred = tf.math.add(tf.math.multiply(X, W), B)
Пример #21
0
         data.Position[data.Position < 0],
         color='g',
         lw=0,
         marker='_',
         label='short')
plt.axhline(y=0, lw=0.5, color='k')
for i in range(NUM_SHARES_PER_TRADE, NUM_SHARES_PER_TRADE * 25,
               NUM_SHARES_PER_TRADE * 5):
    plt.axhline(y=i, lw=0.5, color='r')
for i in range(-NUM_SHARES_PER_TRADE, -NUM_SHARES_PER_TRADE * 25,
               -NUM_SHARES_PER_TRADE * 5):
    plt.axhline(y=i, lw=0.5, color='g')
plt.legend()
plt.show()

data['Pnl'].plot(color='k', lw=1., legend=True)
plt.plot(data.loc[data.Pnl > 0].index,
         data.Pnl[data.Pnl > 0],
         color='g',
         lw=0,
         marker='.')
plt.plot(data.loc[data.Pnl < 0].index,
         data.Pnl[data.Pnl < 0],
         color='r',
         lw=0,
         marker='.')
plt.legend()
plt.show()

data.to_csv("basic_trend_following.csv", sep=",")
Пример #22
0
def main(argv=None): # IGNORE:C0111
    '''Command line options.'''

    if argv is None:
        argv = sys.argv
    else:
        sys.argv.extend(argv)

    program_name = os.path.basename(sys.argv[0])
    program_version = "v%s" % __version__
    program_build_date = str(__updated__)
    program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date)
    program_shortdesc = __import__('__main__').__doc__.split("\n")[1]
    program_license = '''%s

  Created by user_name on %s.
  Copyright 2016 organization_name. All rights reserved.

  Licensed under the Apache License 2.0
  http://www.apache.org/licenses/LICENSE-2.0

  Distributed on an "AS IS" basis without warranties
  or conditions of any kind, either express or implied.

USAGE
''' % (program_shortdesc, str(__date__))

    try:
        # Setup argument parser
        parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter)
        parser.add_argument("-r", "--recursive", dest="recurse", action="store_true", help="recurse into subfolders [default: %(default)s]")
        parser.add_argument("-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: %(default)s]")
        parser.add_argument("-i", "--include", dest="include", help="only include paths matching this regex pattern. Note: exclude is given preference over include. [default: %(default)s]", metavar="RE" )
        parser.add_argument("-e", "--exclude", dest="exclude", help="exclude paths matching this regex pattern. [default: %(default)s]", metavar="RE" )
        parser.add_argument('-V', '--version', action='version', version=program_version_message)
        #parser.add_argument(dest="paths", help="paths to folder(s) with source file(s) [default: %(default)s]", metavar="path", nargs='+')

        # Process arguments
        args = parser.parse_args()

        #paths = args.paths
        verbose = args.verbose
        recurse = args.recurse
        inpat = args.include
        expat = args.exclude

        data = pandas_datareader.data.DataReader('ORCL', data_source='yahoo')
        
        data.to_csv('orcl.csv')
        print "done"
        
        return 0
    except KeyboardInterrupt:
        ### handle keyboard interrupt ###
        return 0
    except Exception, e:
        if DEBUG or TESTRUN:
            raise(e)
        indent = len(program_name) * " "
        sys.stderr.write(program_name + ": " + repr(e) + "\n")
        sys.stderr.write(indent + "  for help use --help")
        return 2