def __init__(self): """ The object is defined with default values that can then be changed in main()""" #self.max_stocks = 100 self.max_stocks = 50 """ cv_factor determines what portion of stocks to put in cross validation set and what portion to leave in training set. cv_factor = 2 means every other stock goes into cross validation set. cv_factor = 3 means every third stock goes into cross validation set """ self.cv_factor = 2 """ future_day is how many training days in the future we train for. Setting future_day = 25 means we are measuring how the stock does 25 days out """ self.future_day = 25 """ The reference dates are the reference dates we are training on""" self.reference_dates = [] #self.reference_dates.append(dateutil.days_since_1900('1980-01-01')) self.reference_dates.append(dateutil.days_since_1900('2000-01-01')) """ test_dates are the dates we are using for testing """ self.test_dates = [] #self.test_dates.append(dateutil.days_since_1900('1991-01-01')) self.test_dates.append(dateutil.days_since_1900('2010-01-01')) """train_history_days and train_increment set how many historical days we use to train and the increment used. Setting train_history_days = 21 and train_increment = 5 means we are using the values at days days 5, 10, 15 and 20 days before the reference day as input features """ self.train_days = 21 self.train_increment = 5 """ output is just a boolean about calling the output function to write out appropriate X and y matricies. The default is False meaning do not write out matricies """ self.output = False
def append(self, stocks, dates): ''' This method appends data to a learningData object It is ment to be called from construct for a new object or from outside the method to append to an existing''' if (self.n != len(dates[1]) + 1): sys.exit("trying to append to wrong size data set") referenceDate = dateutil.days_since_1900(dates[0]) num_stocks = len(stocks) print(referenceDate) for i in range(0, num_stocks): elements = len( stocks[i].dates) # This is the number of entries in stocks firstDayAvailable = stocks[i].dates[elements - 1] firstDayNeeded = referenceDate - max( dates[1]) # How far back I need to go if (firstDayNeeded > firstDayAvailable): self.m += 1 # Find index of referenceDate. refererenceDate might not be a trading # day in which case we will start with index of first trading day # after referenceDay iDay = 0 while (stocks[i].dates[iDay] >= referenceDate): iDay += 1 if (stocks[i].dates[iDay] < referenceDate): iDay -= 1 stockDays = [] stockDays.append(iDay) # Construct an array of indices of values to construct from for iMark in range(0, len(dates[1])): stockDays.append(iDay + dates[1][iMark]) # Now go through array of indices and get the trading values of those days tempValues = [] referenceValue = stocks[i].values[ iDay] # All values for this stock are divided by this for iMark in range(0, len(stockDays)): # divide stock value by value on reference date adjustedValue = stocks[i].values[ stockDays[iMark]] / referenceValue tempValues.append(adjustedValue) self.X.append(tempValues) # Now get the future value and append it to self.y futureDay = iDay - dates[2] adjustedValue = stocks[i].values[futureDay] / referenceValue self.y.append(adjustedValue)
def populate(self): ''' This method populates the dates and values of the stock. The name of the file is the name of the stock and the directory is already known so no arguments are needed''' file = os.path.join(self.directory, self.name + '.csv') with open(file, 'U') as f: reader = csv.reader(f) headers = f.readline() dates = [] values = [] for row in reader: date = dateutil.days_since_1900(row[0]) # Data in the csv files are in reverse cronological order, # insert is used rather than append to put them into cronological dates.append(date) values.append(float(row[6])) self.dates, self.values = dates, values
def append(self, stocks, dates): ''' This method appends data to a learningData object It is ment to be called from construct for a new object or from outside the method to append to an existing''' if (self.n != len(dates[1]) + 1): sys.exit("trying to append to wrong size data set") referenceDate = dateutil.days_since_1900(dates[0]) num_stocks = len(stocks) print (referenceDate) for i in range(0, num_stocks): elements = len(stocks[i].dates) # This is the number of entries in stocks firstDayAvailable = stocks[i].dates[elements-1] firstDayNeeded = referenceDate - max(dates[1]) # How far back I need to go if (firstDayNeeded > firstDayAvailable): self.m += 1 # Find index of referenceDate. refererenceDate might not be a trading # day in which case we will start with index of first trading day # after referenceDay iDay = 0 while (stocks[i].dates[iDay] >= referenceDate): iDay += 1 if (stocks[i].dates[iDay] < referenceDate): iDay -= 1 stockDays = [] stockDays.append(iDay) # Construct an array of indices of values to construct from for iMark in range(0, len(dates[1])): stockDays.append(iDay + dates[1][iMark]) # Now go through array of indices and get the trading values of those days tempValues = [] referenceValue = stocks[i].values[iDay] # All values for this stock are divided by this for iMark in range(0, len(stockDays)): # divide stock value by value on reference date adjustedValue = stocks[i].values[stockDays[iMark]]/referenceValue tempValues.append(adjustedValue) self.X.append(tempValues) # Now get the future value and append it to self.y futureDay = iDay - dates[2] adjustedValue = stocks[i].values[futureDay]/referenceValue self.y.append(adjustedValue)
# -*- coding: utf-8 -*- """ Created on Wed Jan 18 20:26:00 2017 @author: Valued Customer """ import dateutil print("hello world") a = dateutil.days_since_1900('1981-01-01') print(a)
def main(): max_stocks = 100 stocks = Stock.read_stocks('../data/stocks_read.txt', max_stocks) stocks_train = [] stocks_cv = [] count = 0 for stock in stocks: if count % 2 == 0: stocks_train.append(stock) else: stocks_cv.append(stock) count = count + 1 training_data = LearningData() cv_data = LearningData() day_history = [] for i in range(5, 101, 5): day_history.append(i) reference_date = dateutil.days_since_1900('1980-01-01') training_data.construct(stocks_train,[reference_date, day_history, 25]) cv_data.construct(stocks_cv,[reference_date, day_history, 25]) reference_date = dateutil.days_since_1900('1981-01-01') training_data.append(stocks_train,[reference_date, day_history, 25]) cv_data.append(stocks_cv,[reference_date, day_history, 25]) """reference_date = dateutil.days_since_1900('1981-01-01') training_data.append(stocks_train,[reference_date, [50, 100, 150], 50]) cv_data.append(stocks_cv,[reference_date, [50, 100, 150], 50])""" """reference_date = dateutil.days_since_1900('1980-01-01') i_day = dateutil.find_ref_date_idx(stocks[0], reference_date) # trainingData.construct(stocks,['1/1/1980', [50, 100, 150], 50]) training_data.construct(stocks,[reference_date, [50, 100, 150], 50]) reference_date = dateutil.days_since_1900('1981-01-01') training_data.append(stocks,[reference_date, [50, 100, 150], 50]) cv_data = LearningData() reference_date = dateutil.days_since_1900('1982-01-01') cv_data.construct(stocks,[reference_date, [50, 100, 150], 50]) reference_date = dateutil.days_since_1900('1983-01-01') cv_data.append(stocks,[reference_date, [50, 100, 150], 50])""" XX = training_data.X clf = linear_model.Ridge(alpha=0.1, fit_intercept=False) clf.fit(training_data.X, training_data.y) # To look for overfitting the code loops through values of alpha plotting distance between # the predicted values and actual data and various alphas print(training_data.m, cv_data.m) aa = a = np.array((0, 0, 0, 0)) bb = a = np.array((1, 1, 1, 1)) print(np.linalg.norm(bb - aa)) alph = 0.02 f = open('alpha.txt', 'w') while alph < 0.2: #0.2 # First fit the data for each alpha clf = linear_model.Ridge (alpha=alph, fit_intercept=False) clf.fit(training_data.X, training_data.y) # Now measure how close model comes for both training data and cross validation data # The clf.predict produces a vector with one element for each element in the sample predict_data = clf.predict(training_data.X) predict_cv = clf.predict(cv_data.X) # The linagl function in np takes a L2 norm of the difference diff_data = (1.0/training_data.m) * np.linalg.norm(predict_data - training_data.y) diff_cv = (1.0/cv_data.m) * np.linalg.norm(predict_cv - cv_data.y) print("lengths are ", len(predict_data), len(training_data.y), len(predict_cv), len(cv_data.y)) # Write out the values f.write(str(alph) + " " + str(diff_data) + " " + str(diff_cv) + "\n") """print(diff_data, diff_cv) print(predict_data - training_data.y) print(predict_cv - cv_data.y)""" alph = alph * 1.5 # Increment alph f.close() # Do the fit based on best alpha value clf = linear_model.Ridge (alpha=0.05, fit_intercept=False) clf.fit(training_data.X, training_data.y) portfolio_value = 1.0 # Start with a portfolio value of 1.0 average_value = 1.0 investing_data = LearningData() # Construct an LearningData set reference_date = dateutil.days_since_1900('1984-01-01') i_day = dateutil.find_ref_date_idx(stocks[0], reference_date) # print (i_day, stocks[0].dates[i_day] ) """ f = open('value.txt', 'w') while i_day > 100: investing_data.construct(stocks,[reference_date, [50, 100, 150], 50]) # Predict growth of stock values based on history predict_data = clf.predict(investing_data.X) # Predict the stock that will have best growth index_max, value = max(enumerate(predict_data), key=itemgetter(1)) # Upgrade portfolio value based on its actual performance portfolio_value = portfolio_value * investing_data.y[index_max] average_value = average_value * np.mean(investing_data.y) f.write(str(reference_date) + " " + str(portfolio_value) + " " + str(average_value) + "\n") #print(portfolio_value) i_day = i_day - 50 reference_date = stocks[0].dates[i_day] f.close() """ print("run finished")
def main(argv): init_param = InitialParameters() #init_param.reference_dates.append(dateutil.days_since_1900('1981-01-01')) init_param.reference_dates.append(dateutil.days_since_1900('2001-01-01')) execute(init_param)