Exemplo n.º 1
0
def arrayFromQuotesForList(symbolsFile, beginDate, endDate):
    '''
    read in quotes and process to 'clean' ndarray plus date array
    - prices in array with dimensions [num stocks : num days ]
    - process stock quotes to show closing prices adjusted for splits, dividends
    - single ndarray with dates common to all stocks [num days]
    - clean up stocks by:
       - infilling empty values with linear interpolated value
       - repeat first quote to beginning of series
    '''

    from functions.TAfunctions import interpolate
    from functions.TAfunctions import cleantobeginning

    # read symbols list
    symbols = readSymbolList(symbolsFile, verbose=True)

    # get quotes for each symbol in list (adjusted close)
    quote = downloadQuotes(symbols,
                           date1=beginDate,
                           date2=endDate,
                           adjust=True,
                           Verbose=True)

    # clean up quotes for missing values and varying starting date
    #x = quote.as_matrix().swapaxes(0,1)
    quote = quote.convert_objects(convert_numeric=True)  ### test
    x = quote.values.T
    ###print "x = ", x
    date = quote.index
    date = [d.date().isoformat() for d in date]
    datearray = np.array(date)
    symbolList = list(quote.columns.values)

    # Clean up input quotes
    #  - infill interior NaN values using nearest good values to linearly interpolate
    #  - copy first valid quote to from valid date to all earlier positions
    for ii in range(x.shape[0]):
        x[ii, :] = np.array(x[ii, :]).astype('float')
        #print " progress-- ", ii, " of ", x.shape[0], " symbol = ", symbols[ii]
        #print " line 283........."
        x[ii, :] = interpolate(x[ii, :])
        x[ii, :] = cleantobeginning(x[ii, :])

    return x, symbolList, datearray
Exemplo n.º 2
0
    def _return_quotes_array(symbolsFile,
                             start_date="2018-01-01",
                             end_date=None):
        ###
        ### get quotes from yahoo_fix. return quotes, symbols, dates
        ### as numpy arrays
        ###
        import datetime
        from functions.readSymbols import readSymbolList
        from pandas_datareader import data as pdr
        import functions.fix_yahoo_finance as yf
        yf.pdr_override()  # <== that's all it takes :-)

        # read symbols list
        symbols = readSymbolList(symbolsFile, verbose=True)

        if end_date == None:
            end_date = str(datetime.date.today())

        #data = pdr.get_data_yahoo(symbols, start=start_date, end=end_date)
        data = get_quotes_yf(symbols, start_date=start_date, end_date=end_date)
        try:
            # for multiple symbols
            #symbolList = data['Adj Close'].columns
            symbolList = list(data.columns)
        except:
            # for single symbol
            symbolList = symbols
        #datearray = data['Adj Close'].index
        #x = data['Adj Close'].values
        datearray = data.index
        x = data.values
        newdates = []
        for i in range(datearray.shape[0]):
            newdates.append(str(datearray[i]).split(' ')[0])
        newdates = np.array(newdates)

        return x, symbolList, newdates
Exemplo n.º 3
0
def arrayFromQuotesForListWithVol(symbolsFile, beginDate, endDate):
    '''
    read in quotes and process to 'clean' ndarray plus date array
    - prices in array with dimensions [num stocks : num days ]
    - process stock quotes to show closing prices adjusted for splits, dividends
    - single ndarray with dates common to all stocks [num days]
    - clean up stocks by:
       - infilling empty values with linear interpolated value
       - repeat first quote to beginning of series
    '''

    # read symbols list
    symbols = readSymbolList(symbolsFile, verbose=True)

    # get quotes for each symbol in list (adjusted close)
    quote = downloadQuotes(symbols,
                           date1=beginDate,
                           date2=endDate,
                           adjust=True,
                           Verbose=True)

    # clean up quotes for missing values and varying starting date
    x = quote.copyx()
    x = quote.as_matrix().swapaxes(0, 1)
    date = quote.getlabel(2)
    datearray = np.array(date)

    # Clean up input quotes
    #  - infill interior NaN values using nearest good values to linearly interpolate
    #  - copy first valid quote to from valid date to all earlier positions
    for ii in range(x.shape[0]):
        print(" line 315.........")
        x[ii, 0, :] = interpolate(x[ii, 0, :].values)
        x[ii, 0, :] = cleantobeginning(x[ii, 0, :].values)

    return x, symbolList, datearray
Exemplo n.º 4
0
def UpdateHDF_yf(symbol_directory, symbols_file):

    ##
    ##  Update symbols in 'symbols_file' with quotes more recent than last update.
    ##  - use yahoo_fix for pandas_datareader
    ##

    print("  ... inside UpdateHDF_yf ...")

    filename = os.path.join(symbol_directory, symbols_file)

    x, symbols, datearray, quote, listname = loadQuotes_fromHDF(filename)
    print("  ... inside UpdateHDF_yf ... finished loadQuotes_fromHDF")

    def _return_quotes_array(symbolsFile,
                             start_date="2018-01-01",
                             end_date=None):
        ###
        ### get quotes from yahoo_fix. return quotes, symbols, dates
        ### as numpy arrays
        ###
        import datetime
        from functions.readSymbols import readSymbolList
        from pandas_datareader import data as pdr
        import functions.fix_yahoo_finance as yf
        yf.pdr_override()  # <== that's all it takes :-)

        # read symbols list
        symbols = readSymbolList(symbolsFile, verbose=True)

        if end_date == None:
            end_date = str(datetime.date.today())

        #data = pdr.get_data_yahoo(symbols, start=start_date, end=end_date)
        data = get_quotes_yf(symbols, start_date=start_date, end_date=end_date)
        try:
            # for multiple symbols
            #symbolList = data['Adj Close'].columns
            symbolList = list(data.columns)
        except:
            # for single symbol
            symbolList = symbols
        #datearray = data['Adj Close'].index
        #x = data['Adj Close'].values
        datearray = data.index
        x = data.values
        newdates = []
        for i in range(datearray.shape[0]):
            newdates.append(str(datearray[i]).split(' ')[0])
        newdates = np.array(newdates)

        return x, symbolList, newdates

    # get last date in hdf5 archive
    #from datetime import datetime
    import datetime

    date = quote.index
    lastdate = getLastDateFromHDF5(symbol_directory, symbols_file)
    print(" ... inside UpdateHDF5 ... lastdate = ", lastdate)
    from time import sleep
    sleep(3)

    ##
    ## Get quotes for each symbol in list
    ## process dates.
    ## Clean up quotes.
    ## Make a plot showing all symbols in list
    ##

    # locate symbols added to list that aren't in HDF5 file
    symbols_in_list = readSymbolList(filename, verbose=False)
    symbols_in_HDF5 = list(quote.columns.values)
    new_symbols = [x for x in symbols_in_list if x not in symbols_in_HDF5]

    # write new symbols to temporary file
    if len(new_symbols) > 0:
        # write new symbols to temporary file
        tempfilename = os.path.join(symbol_directory,
                                    "newsymbols_tempfile.txt")
        OUTFILE = open(tempfilename, "w")
        for i, isymbol in enumerate(new_symbols):
            print("new symbol = ", isymbol)
            OUTFILE.write(str(isymbol) + "\n")
        OUTFILE.close()

        newquotesfirstdate = datetime.date(1991, 1, 1)
        newquoteslastdate = datetime.date.today()

        # print dates to be used
        print("dates for new symbol found = ", newquotesfirstdate,
              newquoteslastdate)
        print("newquotesfirstdate, newquoteslastdate = ", newquotesfirstdate,
              newquoteslastdate)

        #newadjClose, newsymbols, newdatearray = arrayFromQuotesForList(tempfilename, newquotesfirstdate, newquoteslastdate)
        newadjClose, newsymbols, newdatearray = _return_quotes_array(
            tempfilename,
            start_date=newquotesfirstdate,
            end_date=newquoteslastdate)

        if type(newdatearray) == list:
            newdatearray = np.array(newdatearray)
        print(" newadjClose.shape = ", newadjClose.shape)
        print(" len(newsymbols) = ", len(newsymbols))
        print(" len(newdatearray) = ", len(newdatearray))
        print(" security values check: ",
              newadjClose[np.isnan(newadjClose)].shape)

        newdates = []
        for i in range(newdatearray.shape[0]):
            newdates.append(str(newdatearray[i]))
        #quotes_NewSymbols = pd.DataFrame(newadjClose, [symbols,newdates], dtype=float)
        print("newadjClose.shape = ", newadjClose.shape)
        print('newsymbols = ', newsymbols)
        print('newdatearray = ', newdatearray)
        if newadjClose.shape[1] == len(newdates):
            quotes_NewSymbols = pd.DataFrame(newadjClose.swapaxes(0, 1),
                                             index=newdates,
                                             columns=newsymbols)
        else:
            quotes_NewSymbols = pd.DataFrame(newadjClose,
                                             index=newdates,
                                             columns=newsymbols)
        """
        if newadjClose.ndim > 1:
            quotes_NewSymbols = pd.DataFrame(newadjClose.swapaxes(0,1), index=newdates, columns=newsymbols)
        else:
            quotes_NewSymbols = pd.DataFrame(newadjClose, index=newdates, columns=newsymbols)
        """

    ##
    ## Get quotes for each symbol in list
    ## process dates.
    ## Clean up quotes.
    ## Make a plot showing all symbols in list
    ##

    if type(lastdate) == str:
        newquotesfirstdate = datetime.date(
            *[int(val) for val in lastdate.split('-')])
    else:
        newquotesfirstdate = lastdate
    today = datetime.datetime.now()
    tomorrow = today + timedelta(days=1)
    newquoteslastdate = tomorrow

    #newadjClose, symbols, newdatearray = arrayFromQuotesForList(filename, newquotesfirstdate, newquoteslastdate)
    newadjClose, symbols, newdatearray = _return_quotes_array(
        filename, start_date=newquotesfirstdate, end_date=newquoteslastdate)

    print(" ...inside UpdateSymbols_inHDF5... newadjClose.shape =  ",
          newadjClose.shape)
    print(" ...inside UpdateSymbols_inHDF5... len(symbols) =  ", len(symbols))
    print(" ...inside UpdateSymbols_inHDF5...    quote.shape =  ", quote.shape)

    newdates = []
    for i in range(len(newdatearray)):
        newdates.append(str(newdatearray[i]))
    #quoteupdate = pd.DataFrame( newadjClose.swapaxes(0,1), index=newdates, columns=symbols)
    quoteupdate = pd.DataFrame(newadjClose, index=newdates, columns=symbols)

    updatedquotes = quoteupdate.combine_first(quote)

    ###################
    from functions.TAfunctions import cleanspikes
    from functions.TAfunctions import interpolate
    from functions.TAfunctions import cleantobeginning

    # clean up quotes for missing values and varying starting date
    #x = quote.as_matrix().swapaxes(0,1)
    xupdate = updatedquotes.values.T
    symbolListupdate = list(updatedquotes.columns.values)

    # Clean up input quotes
    #  - infill interior NaN values using nearest good values to linearly interpolate
    #  - copy first valid quote to from valid date to all earlier positions
    #for ii in range(x.shape[0]):
    for ii, isymbolupdate in enumerate(symbolListupdate):
        '''
        if ii%5 == 0:
            print "  ... progress:  ii, symbol = ", ii, isymbolupdate
        '''
        #print("  ... progress:  ii, symbol = ", ii, isymbolupdate)
        xupdate = updatedquotes[isymbolupdate].values
        print("  ... progress:  ii, symbol, # nans = ", ii, isymbolupdate,
              xupdate[~np.isnan(xupdate)].shape)
        xupdate = cleanspikes(xupdate)
        xupdate = cleantobeginning(xupdate)
        xupdate = cleantoend(xupdate)
        xupdate = interpolate(xupdate, verbose=True)
        xupdate = cleantobeginning(xupdate)
        updatedquotes[isymbolupdate] = xupdate
    ###################

    if len(new_symbols) > 0:
        print("\n\n\n...quotes_NewSymbols = ", quotes_NewSymbols.info())
        print("\n\n\n...updatedquotes = ", updatedquotes.info())
        for isymbol in new_symbols:
            updatedquotes[isymbol] = quotes_NewSymbols[isymbol]
        print("\n\n\n...merged updatedquotes = ", updatedquotes.info())

    CASHadjClose = np.ones((len(updatedquotes.index)), float) * 100000.
    for i in range(CASHadjClose.shape[0]):
        if i % 10 == 0:
            CASHadjClose[i] = CASHadjClose[i - 1] + .01
        else:
            CASHadjClose[i] = CASHadjClose[i - 1]

    updatedquotes['CASH'] = CASHadjClose / 100000.

    # set up to write quotes to disk.
    dirname = os.path.join(os.getcwd(), "symbols")

    hdf5filename = os.path.join(dirname, listname + "_.hdf5")
    print("hdf5 filename = ", hdf5filename)
    #updatedquotes.to_hdf( hdf5filename, listname, mode='a',format='table',append=False,complevel=5,complib='blosc')
    updatedquotes.to_hdf(hdf5filename,
                         listname,
                         mode='a',
                         format='table',
                         append=False,
                         complevel=5,
                         complib='blosc')

    return