def main(): #os.chdir(PROJ_DIR) os.chdir(os.path.dirname(os.path.realpath(__file__))) df = EDA(INSTRUMENT, debug=False, tech_analysis=True) print('Technical indicators') techical_indicators_linreg(df) df = EDA(INSTRUMENT) print('Market indicators') macro_indicators_linreg(df)
def main(): #os.chdir(PROJ_DIR) os.chdir(os.path.dirname(os.path.realpath(__file__))) df = EDA(INSTRUMENT, False, True) print('Technical indicators') techical_indicators(df) df = EDA(INSTRUMENT) print('Technical indicators') index_indicators(df)
def main(): os.chdir(os.path.dirname(os.path.realpath(__file__))) df = EDA(INSTRUMENT, False, True) df = df.drop('Close_open', axis=1) print(df.columns) print("Techincal indicators") techical_indicators(df) print() df = EDA(INSTRUMENT) df = df.drop('Close_open', axis=1) print(df.columns) print("Market indicators") index_indicators(df)
import tensorflow as tf import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt from Recommender import Recommender from EDA import EDA train_ds, eval_ds, test_ds, n_users, n_movies = EDA() #NCF Hyperparams r_emb_dim = 32 r_lr = 0.0005 r_epochs = 10 r_l2 = 0.0000 r_dropout = 0.0 r_batch_size = 40960 r_dense_struct = [16, 4] #Model instantiation recommender = Recommender(n_users, n_movies, r_emb_dim, r_dense_struct, r_dropout, r_l2) recommender.compile(tf.keras.optimizers.Adam(r_lr), tf.keras.losses.MeanSquaredError(), metrics=[tf.keras.metrics.RootMeanSquaredError()]) recommender.train_on_batch(train_ds.batch(r_batch_size)) print(recommender.summary()) #Model fit recommender.fit(train_ds.batch(r_batch_size),
import tensorflow as tf import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt from MatrixFactorizer import MatrixFactorizer from EDA import EDA train_ds, eval_ds, test_ds, n_users, n_movies = EDA() #Matrix Factorizer Hyperparams f_emb_dim = 16 f_lr = 0.0015 f_epochs = 10 f_batch_size = 40960 #Model instantiation factorizer = MatrixFactorizer(n_users, n_movies, f_emb_dim) factorizer.compile(tf.keras.optimizers.Adam(f_lr), tf.keras.losses.MeanSquaredError(), metrics=[tf.keras.metrics.RootMeanSquaredError()]) factorizer.train_on_batch(train_ds.batch(f_batch_size)) print(factorizer.summary()) #Model fitting factorizer.fit(train_ds.batch(f_batch_size), epochs=f_epochs, validation_data=eval_ds) #Test Performance of Factorizer
import random import seaborn as sn import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score from sklearn.model_selection import cross_val_score from sklearn.feature_selection import RFE from EDA import EDA # Users who were 60 days enrolled, churn in the next 30 days df = pd.read_csv('churn_data.csv') df = EDA.eda(df) # ============================================================================= # Data Preprocessing # ============================================================================= user_identifier = df['user'] df = df.drop(columns = ['user']) # One-Hot Encoding df.housing.value_counts() df.groupby('housing')['churn'].nunique().reset_index() df = pd.get_dummies(df) df.columns df = df.drop(columns = ['housing_na', 'zodiac_sign_na', 'payment_type_na'])
.set("spark.driver.memory", "40g") sc = SparkContext(conf=conf) sc.addPyFile("/home/tongxialiang/workspace/lixj/SPC-POSM/SPC-POSM.zip") with open('../midResult/edaResult.txt', 'w') as f: for i in range(20): print i, 'th instance ...' # po is data contains informantion about PROVIDERS and CUSTOMERS po = PO() # read providers and customers data from text po.PROVIDERS, po.CUSTOMERS = LoadDataFromText('../data/instance' + str(i) + '.txt') ''' for surrogateSizeRatio in ratioList: print "surrogate size ratio", surrogateSizeRatio eda = EDA(popSize, iterMax, blockMax, po, alpha, beta, D, surrogateFlag, surrogateSizeRatio) eda.evaluate() print "the best solution serial, fitness, mmd, sigmaCost, sigmaCapacity, sigmaDemand ", \ eda.m_BestSolution.geneSerial, eda.m_BestSolution.fitness, eda.m_BestSolution.mmd, eda.m_BestSolution.sigmaCost, eda.m_BestSolution.sigmaCapacity, eda.m_BestSolution.sigmaDemand print "---------------------------------" ''' sumAns = 0 times = 5 for _ in range(times): eda = EDA(sc, popSize, iterMax, blockMax, po, alpha, beta, D, surrogateFlag, 0) eda.evaluate() sumAns += eda.m_BestSolution.sigmaCost f.write(str(sumAns / times) + '\n')
from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.preprocessing import StandardScaler import xgboost as xgb from sklearn.metrics import mean_squared_error from math import sqrt from EDA import EDA # import the dataset train = pd.read_csv('train.csv') test = pd.read_csv('test.csv') store = pd.read_csv('store.csv') # ============================================================================= # EDA # ============================================================================= train, test, store = EDA.eda(train, test, store) # ============================================================================= # Feature Engineering # ============================================================================= store.isnull().sum() # convert types of categorical values to numbers store.head() store['StoreType'] = store['StoreType'].astype('category').cat.codes store['Assortment'] = store['Assortment'].astype('category').cat.codes train["StateHoliday"] = train["StateHoliday"].astype('category').cat.codes merged = pd.merge(train, store, on='Store', how='left') # remove NaNs