示例#1
0
文件: main.py 项目: jobex/larbitrage
#----------------------------------
#     NNLS fitting
#----------------------------------
from modeling import nnls_parser

obj_nnls = nnls_parser.BASIC_NNLS(ts_matrix, ts_matrix_title)

fout = open('result_r2.dat', 'a')
fout.write("%s\t%.6f\n" %(settings.TIME_SLOT, obj_nnls.fitted_r2)) 
fout.close()
np.save('../tmp_data/' + settings.TIME_SLOT+'_obj.npy', obj_nnls)

if pred_folders is not None:
	# now have the new selected_symbols
	selected_symbols = ts_matrix_title[1:-1]

	# use the selected_symbols to get the prediction matrix
	pr_stock_matrix = prep_matrix.parser_for_prediction(pred_folders, 
							selected_symbols, start=settings.Time_Slot, end=settings.E_PRED_DATE)
	pr_hs300_matrix = prep_matrix.parser_for_hs300(start=settings.Time_Slot, end=settings.E_PRED_DATE)
	pr_matrix = ts_format.ts2npy(logic.merge(pr_stock_matrix, pr_hs300_matrix))

	np.save('../tmp_data/'+settings.TIME_SLOT+'_pred.npy', pr_matrix)
	obj_nnls.predict(pr_matrix)

def test():
	#part2()
	import cProfile
	cProfile.run('part2()')
示例#2
0
def parser_for_prediction(data_folders, selected_symbols, 
					start=settings.S_TRAIN_DATE, end=settings.Time_Slot,
					save_to_dict=False):
	'''
	from the data folder, get the truncate ts data and the average spread/vol
	'''

	ts_data_dict, score_dict = {}, {}
	stock_matrix = []

	c = 0
	for symbol in selected_symbols:
		print c, symbol
		tmp_timeline, tmp_ave, tmp_spread, tmp_vol = [], [], [], []
		for folder in data_folders:
			try:
				filename = [os.path.join(folder, filename) 
						for filename in os.listdir(folder) 
						if symbol in filename][0]
			except IndexError:
				print "Warning: empty file of %s in the data folder %s" %(symbol, folder)
				continue

			# use txt reader
			test_t = datetime.strptime(
				open(filename).readline().split(',')[0][:15], "%Y%m%d %H%M%S"
				) # initial test_t
			with open(filename) as f:
				for line in f:
					tmp_line = line.split(',')
					t = datetime.strptime(tmp_line[0][:15], "%Y%m%d %H%M%S")
					b1, s1, v = float(tmp_line[2]), float(tmp_line[12]), float(tmp_line[-1]) 
					ave_price = (b1 + s1)/2.
					spread = (b1 - s1)/2.
					tmp_timeline.append(t)
					tmp_ave.append(ave_price)
					tmp_spread.append(spread)
					#print t, test_t
					if t.day > test_t.day:
						tmp_vol.append(cache_v)
					else:
						test_t = t; cache_v = v
				tmp_vol.append(cache_v)
				pass
			pass

		# truncate the raw data to the time series data
		tmp_data = np.vstack((tmp_timeline, tmp_ave)).T
		# rolling the data
		# NOTE: the start and end used here is meaningless, BUG, Lance, 2013/10/20
		tmp_data = rolling.rolling_anal(start, end, settings.WINDOW_SIZE, tmp_data[:, 0], tmp_data[:, 1])

		# save the numpy array to the dict if request
		if save_to_dict:
			ts_data_dict[symbol] = tmp_data

		# convert to the tuple, make it faster for binding
		tmp_data = [(a[0], tuple(a[1:])) for a in tmp_data] 

		if c == 0:
			stock_matrix = tmp_data
		else:
			stock_matrix = logic.merge(stock_matrix, tmp_data)

		c = c+1

	stock_matrix = truncate(start, end, stock_matrix)
	return stock_matrix