def main(argv): args = parser.parse_args(argv[1:]) beg_date = '2015-01-01' funds = ['002001_Nav'] learning_rate = 0.0001 drop_out = 0.5 train_steps = 5000 df_filtered = fund_Analysis(beg_date, funds) train_sets, cv_sets, test_sets = fund_data_proprocessing( beg_date, funds, df_filtered, 'Week') test_features_data, features_name, test_labels = getTFDataSets(test_sets) # train_features_data, _, train_labels = getTFDataSets(train_sets) # cv_features_data, _, cv_labels = getTFDataSets(cv_sets) # Define Esitmaters feature_cols = [tf.feature_column.numeric_column(k) for k in features_name] classifier = tf.estimator.DNNClassifier( n_classes=3, feature_columns=feature_cols, hidden_units=[1024, 512, 128], optimizer=tf.train.AdamOptimizer(learning_rate), dropout=drop_out)
def main(): beg_date = '2004-01-01' funds = ['002001_Nav'] period = 25 df_filtered = fund_Analysis(beg_date, funds) train_sets, cv_sets, test_sets = fund_data_proprocessing( beg_date, funds, df_filtered, degroup='Roll', split_portion=0.15, period=period) test_features_data, features_name, test_labels = getTFDataSets( test_sets, period) train_features_data, _, train_labels = getTFDataSets(train_sets, period) cv_features_data, _, cv_labels = getTFDataSets(cv_sets, period) X = np.append(np.append(train_features_data, cv_features_data, axis=0), test_features_data, axis=0) X_2 = np.append(train_features_data, cv_features_data, axis=0) y = np.append(np.append(train_labels, cv_labels, axis=0), test_labels, axis=0) y_2 = np.append(train_labels, cv_labels, axis=0) print "Sample Size: {}".format(X_2.shape) print "Labels size: {}".format(y_2.shape) pca = PCA(X, ncomp=200) print pca.factors.shape print pca.ic print pca.eigenvals
def main(argv): args = parser.parse_args(argv[1:]) beg_date = '2015-01-01' # funds = ['002001_Nav'] funds = ['240020_Nav'] train_steps = 2000 df_filtered = fund_Analysis(beg_date, funds) train_sets, cv_sets, test_sets = fund_data_proprocessing( beg_date, funds, df_filtered, 'Week') # print train_sets.keys() # print train_sets['sample_sets'][0] #''' test_features_data, features_name, test_labels = getTFDataSets(test_sets) train_features_data, _, train_labels = getTFDataSets(train_sets) cv_features_data, _, cv_labels = getTFDataSets(cv_sets) train_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": train_features_data}, y=train_labels, batch_size=50, num_epochs=None, shuffle=False) eval_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": cv_features_data}, y=cv_labels, shuffle=False) pred_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": test_features_data}, shuffle=False) # Define Esitmaters feature_cols = [tf.feature_column.numeric_column(k) for k in features_name] # tensors_to_log = {'probabiliteis': 'Softmax_probabilities'} # tensors_to_log = {'accuracy': 'system_accuracy'} # logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=50) classifier = tf.estimator.Estimator( model_fn=lstm_model_fn, model_dir= "/home/ghao/PycharmProjects/Project_FundsAnalysis/LSTM_MultiCells") # train_op = classifier.train(input_fn=train_input_fn, max_steps=train_steps, hooks=[logging_hook]) train_op = classifier.train(input_fn=train_input_fn, max_steps=train_steps) # print train_op eval_results = classifier.evaluate(input_fn=eval_input_fn, checkpoint_path=None) print eval_results # prediction_results = classifier.predict(input_fn=pred_input_fn, checkpoint_path=None) prediction_results = list( classifier.predict(input_fn=pred_input_fn, checkpoint_path=None)) for each_result in prediction_results: print each_result['probabilities'], each_result['classes']
def main(): beg_date = '2004-01-01' funds = ['002001_Nav'] period = 25 df_filtered = fund_Analysis(beg_date, funds) train_sets, cv_sets, test_sets = fund_data_proprocessing( beg_date, funds, df_filtered, degroup='Roll', split_portion=0.15, period=period) test_features_data, features_name, test_labels = getTFDataSets( test_sets, period) train_features_data, _, train_labels = getTFDataSets(train_sets, period) cv_features_data, _, cv_labels = getTFDataSets(cv_sets, period) X = np.append(np.append(train_features_data, cv_features_data, axis=0), test_features_data, axis=0) X_2 = np.append(train_features_data, cv_features_data, axis=0) y = np.append(np.append(train_labels, cv_labels, axis=0), test_labels, axis=0) y_2 = np.append(train_labels, cv_labels, axis=0) print "Sample Size: {}".format(X_2.shape) print "Labels size: {}".format(y_2.shape) knn = KNeighborsClassifier(n_neighbors=18) knn_scores = cross_val_score(knn, X, y, cv=5) print "\n Knn_Score:" print knn_scores print knn_scores.mean() knn.fit(X_2, y_2) pre = knn.predict(test_features_data) metrixReport(test_labels, pre) knn_bag = BaggingClassifier( base_estimator=KNeighborsClassifier(n_neighbors=10), max_samples=0.7, max_features=0.7, n_estimators=5) knn_bag_scores = cross_val_score(knn_bag, X, y, cv=5) print "\n Knn_bag_score" print knn_bag_scores print knn_bag_scores.mean() knn_bag.fit(X_2, y_2) pre = knn_bag.predict(test_features_data) metrixReport(test_labels, pre) '''
def main(argv): args = parser.parse_args(argv[1:]) beg_date = '2015-01-01' funds = ['002001_Nav'] learning_rate = 0.0001 drop_out = 0.5 train_steps = 5000 df_filtered = fund_Analysis(beg_date, funds) train_sets, cv_sets, test_sets = fund_data_proprocessing( beg_date, funds, df_filtered) test_features_data, features_name, test_labels = getTFDataSets(test_sets) train_features_data, _, train_labels = getTFDataSets(train_sets) # cv_features_data, _, cv_labels = getTFDataSets(cv_sets) # Define Esitmaters feature_cols = [tf.feature_column.numeric_column(k) for k in features_name] # feature_cols = [tf.feature_column.numeric_column('feature', shape=[1, 395])] classifier = tf.estimator.DNNClassifier( n_classes=3, feature_columns=feature_cols, hidden_units=[1024, 512, 128], optimizer=tf.train.AdamOptimizer(learning_rate), dropout=drop_out) # sess = tf.Session() # sess.run(iterator.initializer, feed_dict={fea_holder: train_features_data, la_holder: train_labels}) # data = train_input_fn(train_features_data, train_labels) train_op = classifier.train( input_fn=lambda: train_input_fn(train_features_data, train_labels), steps=train_steps) accuracy_op = classifier.evaluate( input_fn=lambda: test_input_fn(test_features_data, test_labels)) accuracy_op = accuracy_op['accuracy'] print("\nTest Accuracy: {0:f}%\n".format(accuracy_op * 100))
def main(argv): args = parser.parse_args(argv[1:]) beg_date = '2015-01-01' funds = ['002001_Nav'] train_steps = 200 df_filtered = fund_Analysis(beg_date, funds) train_sets, cv_sets, test_sets = fund_data_proprocessing( beg_date, funds, df_filtered, 'Week') test_features_data, features_name, test_labels = getTFDataSets(test_sets) train_features_data, _, train_labels = getTFDataSets(train_sets) cv_features_data, _, cv_labels = getTFDataSets(cv_sets) # Define Esitmaters feature_cols = [tf.feature_column.numeric_column(k) for k in features_name] # classifier = tf.estimator.Estimator(model_fn=lambda dataset, mode: lstm_model_fn(dataset, mode), model_dir="/lstm_model") tensors_to_log = {'probabiliteis': 'Softmax_probabilities'} logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=50) classifier = tf.estimator.Estimator( model_fn=lstm_model_fn, model_dir="/home/marshao/DataMiningProjects/Project_FundsAnalysis/LSTM" ) #train_op = classifier.train(input_fn=lambda: train_input_fn(train_features_data, train_labels), # max_steps=train_steps, hooks=[logging_hook]) # print train_op # eval_results = classifier.evaluate(input_fn=lambda: train_input_fn(cv_features_data, cv_labels), checkpoint_path=None) # print eval_results prediction_results = list( classifier.predict(input_fn=lambda: test_input_fn(test_features_data), checkpoint_path=None)) print prediction_results[0]['probabilities'] print prediction_results[0]['classes']
def main(): beg_date = '2004-01-01' funds = ['002001_Nav'] period = 25 df_filtered = fund_Analysis(beg_date, funds) train_sets, cv_sets, test_sets = fund_data_proprocessing( beg_date, funds, df_filtered, degroup='Roll', split_portion=0.15, period=period) test_features_data, features_name, test_labels = getTFDataSets( test_sets, period) train_features_data, _, train_labels = getTFDataSets(train_sets, period) cv_features_data, _, cv_labels = getTFDataSets(cv_sets, period) X = np.append(np.append(train_features_data, cv_features_data, axis=0), test_features_data, axis=0) X_2 = np.append(train_features_data, cv_features_data, axis=0) y = np.append(np.append(train_labels, cv_labels, axis=0), test_labels, axis=0) y_2 = np.append(train_labels, cv_labels, axis=0) print "Sample Size: {}".format(X_2.shape) print "Labels size: {}".format(y_2.shape) pca = PCA(n_components=200) pca_X_2 = pca.fit_transform(X_2) pca_test = pca.fit_transform(test_features_data) print "PCAed Sample Size: {}".format(pca_X_2.shape) knn = KNeighborsClassifier(n_neighbors=18) knn_scores = cross_val_score(knn, X, y, cv=5) print "\n Knn_Score:" print knn_scores print knn_scores.mean() print "\n KNN no PCA" knn.fit(X_2, y_2) pre = knn.predict(test_features_data) metrixReport(test_labels, pre) print "\n KNN after PCA" knn.fit(pca_X_2, y_2) pre = knn.predict(pca_test) metrixReport(test_labels, pre) knn_bag = BaggingClassifier( base_estimator=KNeighborsClassifier(n_neighbors=10), max_samples=0.7, max_features=0.7, n_estimators=5) knn_bag_scores = cross_val_score(knn_bag, X, y, cv=5) print "\n Knn_bag_score" print knn_bag_scores print knn_bag_scores.mean() print "\n KNN bag no PCA" knn_bag.fit(X_2, y_2) pre = knn_bag.predict(test_features_data) metrixReport(test_labels, pre) print "\n KNN bag after PCA" knn_bag.fit(pca_X_2, y_2) pre = knn_bag.predict(pca_test) metrixReport(test_labels, pre) random_forest = RandomForestClassifier(max_depth=5, max_features=0.5, n_estimators=10) random_forest_score = cross_val_score(random_forest, X, y, cv=5) print "\n Random Forest Score:" print random_forest_score print random_forest_score.mean() print "\n RF no PCA" random_forest.fit(X_2, y_2) pre = random_forest.predict(test_features_data) metrixReport(test_labels, pre) print "\n RF after PCA" random_forest.fit(pca_X_2, y_2) pre = random_forest.predict(pca_test) metrixReport(test_labels, pre) '''
from __future__ import print_function from C_Fund_Analysis import fund_Analysis, fund_data_proprocessing import numpy as np import pandas as pd import tensorflow as tf beg_date = '2015-01-01' funds = ['002001_Nav'] df_filtered = fund_Analysis(beg_date, funds) train_sets, cv_sets, test_sets = fund_data_proprocessing( beg_date, funds, df_filtered) def getFeatures(samples): array_z = np.zeros((1, 395), dtype=np.float32) for sample in samples: row, col = sample.shape columns = sample.columns em_rows = 5 - row if em_rows > 0: df = pd.DataFrame(np.zeros((em_rows, col)), columns=columns) sample = pd.concat([sample, df]) if em_rows < 0: sample = sample.iloc[1:, :] if array_z[0, 0] == 0: array = np.array(sample.values) array_z = np.reshape(array, (1, -1)) else: array = np.array(sample.values)
def main(argv): args = parser.parse_args(argv[1:]) beg_date = '2015-01-01' funds = ['002001_Nav'] train_steps = 4000 df_filtered = fund_Analysis(beg_date, funds) train_sets, cv_sets, test_sets = fund_data_proprocessing( beg_date, funds, df_filtered, 'Week') test_features_data, features_name, test_labels = getTFDataSets(test_sets) train_features_data, _, train_labels = getTFDataSets(train_sets) cv_features_data, _, cv_labels = getTFDataSets(cv_sets) train_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": train_features_data}, y=train_labels, batch_size=50, num_epochs=None, shuffle=False) eval_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": cv_features_data}, y=cv_labels, # batch_size=50, # num_epochs=None, shuffle=False) pred_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": test_features_data}, # For prediction, the batch_size and num_epochs should not be there, # otherewise, the prediction will be ends in infinite loops # batch_size=50, #num_epochs=None, shuffle=False) # Define Esitmaters feature_cols = [tf.feature_column.numeric_column(k) for k in features_name] # classifier = tf.estimator.Estimator(model_fn=lambda dataset, mode: lstm_model_fn(dataset, mode), model_dir="/lstm_model") tensors_to_log = {'probabiliteis': 'Softmax_probabilities'} logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=50) classifier = tf.estimator.Estimator( model_fn=lstm_model_fn, model_dir="/home/marshao/DataMiningProjects/Project_FundsAnalysis/LSTM" ) train_op = classifier.train(input_fn=train_input_fn, max_steps=train_steps, hooks=[logging_hook]) # print train_op eval_results = classifier.evaluate(input_fn=eval_input_fn, checkpoint_path=None) print eval_results #prediction_results = classifier.predict(input_fn=pred_input_fn, checkpoint_path=None) prediction_results = list( classifier.predict(input_fn=pred_input_fn, checkpoint_path=None)) for each_result in prediction_results: print each_result['probabilities'], each_result['classes']