def __init__(self):
     """ The object is defined with default values that can then be changed in main()"""
     
     #self.max_stocks = 100
     self.max_stocks = 50
     """ cv_factor determines what portion of stocks to put in cross validation set and what portion
         to leave in training set. cv_factor = 2 means every other stock goes into cross validation
         set. cv_factor = 3 means every third stock goes into cross validation set """
     self.cv_factor = 2 
     """ future_day is how many training days in the future we train for. Setting future_day = 25
         means we are measuring how the stock does 25 days out """
     self.future_day = 25
     """ The reference dates are the reference dates we are training on"""
     self.reference_dates = []
     #self.reference_dates.append(dateutil.days_since_1900('1980-01-01'))
     self.reference_dates.append(dateutil.days_since_1900('2000-01-01'))
     """ test_dates are the dates we are using for testing """
     self.test_dates = []
     #self.test_dates.append(dateutil.days_since_1900('1991-01-01'))
     self.test_dates.append(dateutil.days_since_1900('2010-01-01'))
     """train_history_days and train_increment set how many historical days we use to
        train and the increment used. Setting train_history_days = 21 and train_increment = 5
        means we are using the values at days days 5, 10, 15 and 20 days before the reference day
        as input features """
     self.train_days = 21
     self.train_increment = 5
     """ output is just a boolean about calling the output function to write out 
         appropriate X and y matricies. The default is False meaning do not write out
         matricies """
     self.output = False
示例#2
0
    def append(self, stocks, dates):
        ''' This method appends data to a learningData object
            It is ment to be called from construct
            for a new object or from outside the method to append to an existing'''

        if (self.n != len(dates[1]) + 1):
            sys.exit("trying to append to wrong size data set")

        referenceDate = dateutil.days_since_1900(dates[0])
        num_stocks = len(stocks)
        print(referenceDate)

        for i in range(0, num_stocks):
            elements = len(
                stocks[i].dates)  # This is the number of entries in stocks
            firstDayAvailable = stocks[i].dates[elements - 1]
            firstDayNeeded = referenceDate - max(
                dates[1])  # How far back I need to go
            if (firstDayNeeded > firstDayAvailable):
                self.m += 1
                # Find index of referenceDate. refererenceDate might not be a trading
                # day in which case we will start with index of first trading day
                # after referenceDay
                iDay = 0
                while (stocks[i].dates[iDay] >= referenceDate):
                    iDay += 1
                if (stocks[i].dates[iDay] < referenceDate):
                    iDay -= 1
                stockDays = []
                stockDays.append(iDay)
                # Construct an array of indices of values to construct from
                for iMark in range(0, len(dates[1])):
                    stockDays.append(iDay + dates[1][iMark])
                # Now go through array of indices and get the trading values of those days
                tempValues = []
                referenceValue = stocks[i].values[
                    iDay]  # All values for this stock are divided by this
                for iMark in range(0, len(stockDays)):
                    # divide stock value by value on reference date
                    adjustedValue = stocks[i].values[
                        stockDays[iMark]] / referenceValue
                    tempValues.append(adjustedValue)
                self.X.append(tempValues)
                # Now get the future value and append it to self.y
                futureDay = iDay - dates[2]
                adjustedValue = stocks[i].values[futureDay] / referenceValue
                self.y.append(adjustedValue)
示例#3
0
    def populate(self):
        ''' This method populates the dates and values of the stock.
            The name of the file is the name of the stock and the directory
            is already known so no arguments are needed'''

        file = os.path.join(self.directory, self.name + '.csv')
        with open(file, 'U') as f:
            reader = csv.reader(f)
            headers = f.readline()
            dates = []
            values = []
            for row in reader:
                date = dateutil.days_since_1900(row[0])
                # Data in the csv files are in reverse cronological order,
                # insert is used rather than append to put them into cronological
                dates.append(date)
                values.append(float(row[6]))
        self.dates, self.values = dates, values
示例#4
0
    def populate(self):
        ''' This method populates the dates and values of the stock.
            The name of the file is the name of the stock and the directory
            is already known so no arguments are needed'''

        file = os.path.join(self.directory, self.name + '.csv')
        with open(file, 'U') as f:
            reader = csv.reader(f)
            headers = f.readline()
            dates = []
            values = []
            for row in reader:
                date = dateutil.days_since_1900(row[0])
                # Data in the csv files are in reverse cronological order,
                # insert is used rather than append to put them into cronological
                dates.append(date) 
                values.append(float(row[6]))
        self.dates, self.values = dates, values
示例#5
0
 def append(self, stocks, dates):
     ''' This method appends data to a learningData object
         It is ment to be called from construct
         for a new object or from outside the method to append to an existing'''
     
     if (self.n != len(dates[1]) + 1):
         sys.exit("trying to append to wrong size data set")
     
     
     referenceDate = dateutil.days_since_1900(dates[0])
     num_stocks = len(stocks)
     print (referenceDate)
     
     for i in range(0, num_stocks):
         elements = len(stocks[i].dates) # This is the number of entries in stocks
         firstDayAvailable = stocks[i].dates[elements-1]
         firstDayNeeded = referenceDate - max(dates[1]) # How far back I need to go
         if (firstDayNeeded > firstDayAvailable): 
             self.m += 1
             # Find index of referenceDate. refererenceDate might not be a trading
             # day in which case we will start with index of first trading day
             # after referenceDay
             iDay = 0
             while (stocks[i].dates[iDay] >= referenceDate):
                 iDay += 1
             if (stocks[i].dates[iDay] < referenceDate):
                 iDay -= 1
             stockDays = []
             stockDays.append(iDay)
             # Construct an array of indices of values to construct from
             for iMark in range(0, len(dates[1])):
                 stockDays.append(iDay + dates[1][iMark])
             # Now go through array of indices and get the trading values of those days
             tempValues = []
             referenceValue = stocks[i].values[iDay] # All values for this stock are divided by this
             for iMark in range(0, len(stockDays)):
                 # divide stock value by value on reference date 
                 adjustedValue = stocks[i].values[stockDays[iMark]]/referenceValue
                 tempValues.append(adjustedValue)
             self.X.append(tempValues)
             # Now get the future value and append it to self.y
             futureDay = iDay - dates[2]
             adjustedValue = stocks[i].values[futureDay]/referenceValue
             self.y.append(adjustedValue)
示例#6
0
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 18 20:26:00 2017

@author: Valued Customer
"""

import dateutil

print("hello world")
a = dateutil.days_since_1900('1981-01-01')
print(a)
def main():
    
    max_stocks = 100
    stocks = Stock.read_stocks('../data/stocks_read.txt', max_stocks)
    stocks_train = []
    stocks_cv = []
    count = 0
    for stock in stocks:
        if count % 2 == 0:
            stocks_train.append(stock)
        else:
            stocks_cv.append(stock)
        count = count + 1

    training_data = LearningData()
    cv_data = LearningData()
    
    day_history = []
    for i in range(5, 101, 5):
        day_history.append(i)
        
    reference_date = dateutil.days_since_1900('1980-01-01')
    training_data.construct(stocks_train,[reference_date, day_history, 25])
    cv_data.construct(stocks_cv,[reference_date, day_history, 25])
    
    reference_date = dateutil.days_since_1900('1981-01-01')
    training_data.append(stocks_train,[reference_date, day_history, 25])
    cv_data.append(stocks_cv,[reference_date, day_history, 25])
    
    """reference_date = dateutil.days_since_1900('1981-01-01')
    training_data.append(stocks_train,[reference_date, [50, 100, 150], 50])
    cv_data.append(stocks_cv,[reference_date, [50, 100, 150], 50])"""
    """reference_date = dateutil.days_since_1900('1980-01-01')
    i_day = dateutil.find_ref_date_idx(stocks[0], reference_date)
   # trainingData.construct(stocks,['1/1/1980', [50, 100, 150], 50])
    training_data.construct(stocks,[reference_date, [50, 100, 150], 50])
    reference_date = dateutil.days_since_1900('1981-01-01')
    training_data.append(stocks,[reference_date, [50, 100, 150], 50])
	
    cv_data = LearningData()
    reference_date = dateutil.days_since_1900('1982-01-01')
    cv_data.construct(stocks,[reference_date, [50, 100, 150], 50])
    reference_date = dateutil.days_since_1900('1983-01-01')
    cv_data.append(stocks,[reference_date, [50, 100, 150], 50])"""
	
    XX = training_data.X
    clf = linear_model.Ridge(alpha=0.1, fit_intercept=False)
    clf.fit(training_data.X, training_data.y)
	

    # To look for overfitting the code loops through values of alpha plotting distance between
    # the predicted values and actual data and various alphas
    print(training_data.m, cv_data.m)
    aa = a = np.array((0, 0, 0, 0))
    bb = a = np.array((1, 1, 1, 1))
    print(np.linalg.norm(bb - aa))
    alph = 0.02
    f = open('alpha.txt', 'w')
    while alph < 0.2:  #0.2
        # First fit the data for each alpha
        clf = linear_model.Ridge (alpha=alph, fit_intercept=False)
        clf.fit(training_data.X, training_data.y)
        # Now measure how close model comes for both training data and cross validation data
        # The clf.predict produces a vector with one element for each element in the sample
        predict_data = clf.predict(training_data.X)
        predict_cv = clf.predict(cv_data.X)
        # The linagl function in np takes a L2 norm of the difference
        diff_data = (1.0/training_data.m) * np.linalg.norm(predict_data - training_data.y)
        diff_cv = (1.0/cv_data.m) * np.linalg.norm(predict_cv - cv_data.y)
        print("lengths are ", len(predict_data), len(training_data.y), len(predict_cv), len(cv_data.y))
        # Write out the values
        f.write(str(alph) +  " " + str(diff_data) + " " + str(diff_cv) +  "\n")
        """print(diff_data, diff_cv)
        print(predict_data - training_data.y)
        print(predict_cv - cv_data.y)"""
        alph = alph * 1.5 # Increment alph
    f.close()
     
     # Do the fit based on best alpha value   
    clf = linear_model.Ridge (alpha=0.05, fit_intercept=False)
    clf.fit(training_data.X, training_data.y)
    
    portfolio_value = 1.0 # Start with a portfolio value of 1.0
    average_value = 1.0
    investing_data = LearningData()
    
    # Construct an LearningData set
    reference_date = dateutil.days_since_1900('1984-01-01')
    i_day = dateutil.find_ref_date_idx(stocks[0], reference_date)
  #  print (i_day, stocks[0].dates[i_day] )
    """ f = open('value.txt', 'w')
    
    while i_day > 100:
        investing_data.construct(stocks,[reference_date, [50, 100, 150], 50])  
        # Predict growth of stock values based on history
        predict_data = clf.predict(investing_data.X)
        # Predict the stock that will have best growth
        index_max, value = max(enumerate(predict_data), key=itemgetter(1))
        # Upgrade portfolio value based on its actual performance
        portfolio_value = portfolio_value * investing_data.y[index_max]
        average_value = average_value * np.mean(investing_data.y)
        f.write(str(reference_date) + " " + str(portfolio_value) + " " + str(average_value) + "\n")
        #print(portfolio_value)
        i_day = i_day - 50
        reference_date = stocks[0].dates[i_day]
    f.close() """
    
    print("run finished")
def main(argv):
    
    init_param = InitialParameters()
    #init_param.reference_dates.append(dateutil.days_since_1900('1981-01-01'))
    init_param.reference_dates.append(dateutil.days_since_1900('2001-01-01'))
    execute(init_param)