from pybrain.structure.modules import LSTMLayer, SigmoidLayer, LinearLayer, TanhLayer, SoftmaxLayer from pybrain.tools.validation import testOnSequenceData from pybrain.structure.modules.neuronlayer import NeuronLayer from pybrain.tools.xml.networkwriter import NetworkWriter from pybrain.tools.xml.networkreader import NetworkReader random.seed(42) def ir(p, i, data): # p-days, i=current day a = np.sum([n['adj_close'] for n in data[i:i+p]]) / p c = data[i]['adj_close'] return ((a - c) / c) # prepare date symbol = '^HSI' yahoo_data = YahooHistorical() yahoo_data.open(os.path.join(os.path.dirname(__file__), 'data/' + symbol + '.csv')) training_set = np.array([n for n in yahoo_data.data if n['date'] >= date(2007, 1, 1) and n['date'] <= date(2013, 12, 31)]) test_set = np.array([n for n in yahoo_data.data if n['date'] >= date(2014, 1, 1) and n['date'] <= date(2015, 12, 31)]) rsi = yahoo_data.relative_strength(n=14) (sma13, sma7, macd) = yahoo_data.moving_average_convergence(7, 13) # 7 days and 13 days moving average and MACD test_label = [] training_label = [n['date'] for n in training_set] training_list = np.array([n['adj_close'] for n in training_set]) training_target = np.zeros(len(training_list)) test_list = np.array([n['adj_close'] for n in test_set]) test_target = np.zeros(len(test_list)) test_target[list(argrelextrema(test_list, np.greater)[0])] = 1 test_target[list(argrelextrema(test_list, np.less)[0])] = -1 training_target[list(argrelextrema(training_list, np.greater)[0])] = 1 training_target[list(argrelextrema(training_list, np.less)[0])] = -1
# Decimal Scaling # replace nan with first non-nan nan = np.isnan(data) nnan = np.where(~nan)[0][0] data[:nnan] = data[nnan] if mean == None: mean = np.mean(data) if std == None: std = np.std(data) return (data - mean) / std, mean, std if __name__=="__main__": # prepare date symbol = "0005.HK" stock_data = YahooHistorical() stock_data.open(os.path.join(os.path.dirname(__file__), "data/" + symbol + ".csv")) dataset = stock_data.get() date = np.array([n['date'] for n in dataset]) close_prices = np.array([n['adj_close'] for n in dataset]) low_prices = np.array([n['low'] for n in dataset]) high_prices = np.array([n['high'] for n in dataset]) volumes = np.array([n['vol'] for n in dataset]) prices, mean, std = normalize(close_prices) low_prices, mean, std = normalize(low_prices, mean, std) high_prices, mean, std = normalize(high_prices, mean, std) emax = list(argrelextrema(prices, np.greater)[0]) emin = list(argrelextrema(prices, np.less)[0]) sma13, mean, std = normalize(talib.SMA(close_prices, 13), mean, std) # 50-day SMA sma50, mean, std = normalize(talib.SMA(close_prices, 50), mean, std) # 50-day SMA sma100, mean, std = normalize(talib.SMA(close_prices, 100), mean, std) # 100-day SMA
from sys import stdout import numpy as np import matplotlib.pyplot as plt from scipy.signal import argrelextrema def normalize(data, mean=None, std=None): if mean == None: mean = np.mean(data) if std == None: std = np.std(data) x = (data - mean) / std return x, mean, std # prepare date symbol = '^HSI' yahoo_data = YahooHistorical(data_from=date(2014, 1, 1), data_to=date(2015, 12, 31)) yahoo_data.open(os.path.join(os.path.dirname(__file__), 'data/' + symbol + '.csv')) training_set = yahoo_data.get() test_set = training_set[200:] rsi = yahoo_data.relative_strength(n=14) (sma13, sma7, macd) = yahoo_data.moving_average_convergence(7, 13) # 7 days and 13 days moving average label = np.array([n['date'] for n in test_set]) (training_prices, m, s) = normalize(np.array([n['adj_close'] for n in training_set])) (prices, m, s) = normalize(np.array([n['adj_close'] for n in test_set])) (tmax, tmin) = yahoo_data.trading_range_breakout(50) (sma13, m, s) = normalize(sma13, m, s) (sma7, m, s) = normalize(sma7, m, s) (tmax, m, s) = normalize(tmax, m, s) (tmin, m, s) = normalize(tmin, m, s) (macd, m, s) = normalize(macd) (rsi, m, s) = normalize(rsi)
import os from data_prepare import YahooHistorical from datetime import date, datetime import numpy as np from itertools import cycle from sys import stdout import matplotlib.pyplot as plt from pybrain.supervised import RPropMinusTrainer, BackpropTrainer from pybrain.datasets import SequentialDataSet from pybrain.tools.shortcuts import buildNetwork from pybrain.structure.modules import LSTMLayer, SigmoidLayer, LinearLayer, TanhLayer from pybrain.tools.validation import testOnSequenceData # prepare date yahoo_data = YahooHistorical() yahoo_data.open(os.path.join(os.path.dirname(__file__), 'data/^HSI.csv')) dataset = yahoo_data.get() # build network net = buildNetwork(5, 25, 2, hiddenclass=LSTMLayer, outclass=SigmoidLayer, outputbias=False, recurrent=True) net.randomize() # build sequential dataset train_ds = SequentialDataSet(5, 2) for n, n1, m20 in zip(training_set, training_set[1:], sma20[-len(training_set):]): i = [n['open'], n['high'], n['low'], n['adj_close'], m20] d = (n1['adj_close'] - n['adj_close']) / n['adj_close'] o = [-1, -1] if d > 0: o[0] = abs(d)
__version__ = '0.1' import os from sys import stdout import matplotlib.pyplot as plt from data_prepare import YahooHistorical from datetime import date, datetime import numpy as np from lib.features import Features from pybrain.structure.modules import KohonenMap import pickle np.random.seed(42) symbol1 = '0005.HK' yahoo_data1 = YahooHistorical(data_from=date(2000, 1, 1), data_to=date(2015, 12, 31)) yahoo_data1.open(os.path.join(os.path.dirname(__file__), 'data/' + symbol1 + '.csv')) data1 = yahoo_data1.get() dataset1 = np.asarray([n['adj_close'] for n in data1]) p = 17 # 17-day p = 5 # 5-day nodes = 3 som = KohonenMap(p, nodes) # som = pickle.load(open("pattern5.p", "rb")) som.learningrate = 0.01 epochs = 1000 training_dataset = [] result = {} # preparation for i in xrange(p, len(dataset1)):
import pickle np.random.seed(42) def normalize(data, mean=None, std=None): data = np.asarray(data) if mean == None: mean = np.mean(data) if std == None: std = np.std(data) return (data - mean) / std, mean, std symbol1 = "0005.HK" yahoo_data = YahooHistorical(data_from=date(2000, 1, 1), data_to=date(2015, 12, 31)) yahoo_data.open(os.path.join(os.path.dirname(__file__), "data/" + symbol1 + ".csv")) data = yahoo_data.get() close_prices = np.array([n["adj_close"] for n in data]) # low_prices = np.array([n['low'] for n in data]) # high_prices = np.array([n['high'] for n in data]) # volumes = np.array([n['vol'] for n in data]) # prices, mean, std = normalize(close_prices) # sma10, mean, std = normalize(talib.SMA(close_prices, 10), mean, std) # 10-day SMA # sma50, mean, std = normalize(talib.SMA(close_prices, 50), mean, std) # 50-day SMA # sma200, mean, std = normalize(talib.SMA(close_prices, 200), mean, std) # 200-day SMA # macd_upper, macd_middle, macd_lower = talib.MACD(close_prices, 12, 26, 9) # mfi = talib.MFI(high_prices, low_prices, close_prices, volumes) # Money Flow Index # rsi = talib.RSI(close_prices) training_input = [] training_output = []