def __init__(self, seed=999, n_splits=10): self.seed = seed self.n_splits = n_splits import sys sys.path.append( r"C:\Users\Kelvin\CloudStation\MSC COMPUTER SCIENCE\Dissertation\CODE\Dissertation\Dissertation" ) #sys.path.append(r"C:\Users\Kelvi\CloudStation\MSC COMPUTER SCIENCE\Dissertation\CODE\Dissertation\Dissertation") from Models import Models self.models = Models() from sklearn.model_selection import GridSearchCV from sklearn.model_selection import TimeSeriesSplit tscv = TimeSeriesSplit(n_splits) from sklearn.neural_network import MLPClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.gaussian_process.kernels import RBF from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier from sklearn.naive_bayes import GaussianNB from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis #self.models.add_model(model = GridSearchCV(estimator=MLPClassifier(random_state=seed), param_grid={}, cv=tscv), model_name = 'Neural Net') #self.models.add_model(model = GridSearchCV(estimator=KNeighborsClassifier(), param_grid={}, cv=tscv), model_name = 'KNN') #self.models.add_model(model = GridSearchCV(estimator=SVC(kernel='linear', random_state=seed), param_grid={}, cv=tscv), model_name = 'Linear SVM') #self.models.add_model(model = GridSearchCV(estimator=SVC(kernel='rbf', random_state=seed), param_grid={}, cv=tscv), model_name = 'RBF SVM') #self.models.add_model(model = GridSearchCV(estimator=GaussianProcessClassifier(random_state=seed), param_grid={}, cv=tscv), model_name = 'Gaussian Process') #self.models.add_model(model = GridSearchCV(estimator=DecisionTreeClassifier(random_state=seed), param_grid={}, cv=tscv), model_name = 'Decision Tree') #self.models.add_model(model = GridSearchCV(estimator=RandomForestClassifier(random_state=seed), param_grid={}, cv=tscv), model_name = 'Random Forest') #self.models.add_model(model = GridSearchCV(estimator=AdaBoostClassifier(random_state=seed), param_grid={}, cv=tscv), model_name = 'AdaBoost') #self.models.add_model(model = GridSearchCV(estimator=GaussianNB(), param_grid={}, cv=tscv), model_name = 'Naive Bayes') ##self.models.add_model(model = GridSearchCV(estimator=QuadraticDiscriminantAnalysis(), param_grid={}, cv=tscv), model_name = 'QDA') self.models.add_model(model=MLPClassifier(random_state=seed), model_name='Neural Net') self.models.add_model(model=KNeighborsClassifier(), model_name='KNN') self.models.add_model(model=SVC(kernel='linear', random_state=seed), model_name='Linear SVM') self.models.add_model(model=SVC(kernel='rbf', random_state=seed), model_name='RBF SVM') self.models.add_model( model=GaussianProcessClassifier(random_state=seed), model_name='Gaussian Process') self.models.add_model(model=DecisionTreeClassifier(random_state=seed), model_name='Decision Tree') self.models.add_model(model=RandomForestClassifier(random_state=seed), model_name='Random Forest') self.models.add_model(model=AdaBoostClassifier(random_state=seed), model_name='AdaBoost') self.models.add_model(model=GaussianNB(), model_name='Naive Bayes')
def train_model(new_data): #declare objects Data_preparation = data_preparation() models = Models() if new_data: #read_data data = Data_preparation.read_data_add_labels() add_article_topic_col(data) data = Data_preparation.add_full_text(data) data = Data_preparation.add_binary_topics_col(data) data.to_csv('new_data/new_processed_data.csv') else: data = pd.read_csv('new_data/new_processed_data.csv', index_col=0) #for fast debug #data = data.sample(n=1000) train, test = train_test_split(data, test_size=0.1) train1, train2 = train_test_split(train, test_size=0.5) #train naive baise model nb_model_obj = models.train_NB_model(train1) zero_one_train_matrix = Data_preparation.create_zero_one_matrix( nb_model_obj, train2) lr_model_obj = models.train_lr_model(zero_one_train_matrix, train2['LABEL']) #save model if save_model: nb_pkl_filename = 'nb_pickle_model.pkl' with open(nb_pkl_filename, 'wb') as file: pickle.dump(nb_model_obj, file) lr_pkl_filename = 'lr_pickle_model.pkl' with open(lr_pkl_filename, 'wb') as file: pickle.dump(lr_model_obj, file) predict_obj = Predict(nb_model_obj, lr_model_obj) nb_prediction = predict_obj.nb_predict(test, data_preparation) print('test nb score: ' + str(np.mean(nb_prediction == test['LABEL']))) lr_proba, lr_prediction = predict_obj.lr_predict(test, Data_preparation) print('test lr score: ' + str(np.mean(lr_prediction == test['LABEL']))) predict_obj.get_confusion_matrix(test['LABEL'], lr_prediction, 'all') quantile_data, quantile_accurate = predict_obj.get_quantile_accurate( test, lr_prediction, lr_proba) with pd.option_context('display.max_rows', None, 'display.max_columns', None): print(quantile_accurate) #todo add confusion matrix for each band for index, row in quantile_accurate.iterrows(): print(row['probaBand']) quantile = quantile_data[quantile_data['probaBand'] == row['probaBand']]
def test_challenge5(self): self.driver.get("https://www.copart.com") s = CopartSearchBar(self.driver) model = "porsche" s.search_input(model) entryNumber100 = self.driver.find_element( By.XPATH, "//*[@id='serverSideDataTable_length']//option[3]") entryNumber100.click() WebDriverWait(self.driver, 10).until( expected_conditions.visibility_of_element_located( (By.XPATH, "//*[@id='serverSideDataTable']/tbody/tr[100]"))) m = Models(self.driver) m.unique_model_counter(100) d = Damages(self.driver) d.damage_finder(100)
def __init__(self, formal, caps, group): # Load the dataset data = DatasetLoader() # Create and train the models modelz = Models(data) # modelz.showPerformances() modelz.setSingleModel() # this will set the multiNB model # Wait for the models to finish loading while not modelz.endLoading: time.sleep(1) # Initialize the chat and run the dialogs chat = ChatManager(modelz, group, formal, caps) chat.run()
def Classify(X, Y, cls, rep, k=5000): # Start moment Start_moment = time.time() title = 'Classificando com {} e {} k={}'.format(cls, rep, k) print(title) # Creating the K-fold cross validator if 'w2v' in rep: train_x = load(open('w2v_rep/{}_train_x.pkl'.format(rep), 'rb')) train_y = load(open('w2v_rep/{}_train_y.pkl'.format(rep), 'rb')) test_x = load(open('w2v_rep/{}_test_x.pkl'.format(rep), 'rb')) test_y = load(open('w2v_rep/{}_test_y.pkl'.format(rep), 'rb')) else: X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=123, stratify=Y) train_x, train_y, test_x, test_y = Representations( ).get_representation(rep=rep, train_x=X_train, train_y=y_train, test_x=X_test, test_y=y_test, k=k, cat=None) sm = SMOTE(sampling_strategy='minority', random_state=None) train_x, train_y = sm.fit_sample(train_x, train_y) # dump(train_x, open('w2v_rep/{}_train_x.pkl'.format(rep), 'wb')) # dump(train_y, open('w2v_rep/{}_train_y.pkl'.format(rep), 'wb')) # dump(test_x, open('w2v_rep/{}_test_x.pkl'.format(rep), 'wb')) # dump(test_y, open('w2v_rep/{}_test_y.pkl'.format(rep), 'wb')) # return classifier = Models().get_classifier(cls) classifier.fit(train_x, train_y) # Train_Classifier(classifier, X_train, Y_train) pred = classifier.predict(test_x) # report = classification_report(test_labels, test_pred, target_names=['Contrário', 'Favorável'] if plb =='polaridade' else ['neutro', 'opiniao']) report = classification_report(test_y, pred, target_names=['no', 'yes']) print(report) Finish_moment = time.time() tm = "It took " + str((Finish_moment - Start_moment)) + " seconds" print(tm)
def test(): models = Models() exp = RegressionExperiment("med_random") exp.predict(FeaturePipeline.ngram_reg(models.get("svr"), 1))
if args.method.lower() in ['sorecgatitem']: dataset = SocialItem_Dataset(args) elif args.method.lower() in ['sorecgatuser']: dataset = SocialUser_Dataset(args) else: dataset = Dataset(args) params = Parameters(args, dataset) print( """Load data done [%.1f s]. #user:%d, #item:%d, #dom:%d, #train:%d, #test:%d, #valid:%d""" % (time() - t1, params.num_users, params.num_items, params.num_doms, params.num_train_instances, params.num_test_instances, params.num_valid_instances)) print('Method: %s' % (params.method)) if params.method in ['sorecgatitem', 'sorecgatuser']: model = Models(params) model.define_model() model.define_loss('all') print("Model definition completed: in %.2fs" % (time() - t1)) train_step = get_optimizer(params.learn_rate, params.optimizer).minimize(model.loss) init = tf.global_variables_initializer() config = tf.ConfigProto() config.gpu_options.allow_growth = True print('train instances: {}'.format(params.train_matrix.nnz)) error_plot = Error_plot(save_flag=True, res_path=params.result_path, args_str=args_str, args=args)
def execute(self): # parameters epsilon = .5 # exploration epsilon_decay = 0.95 epsilon_min = 0.1 epoch = 4000 # is number of cycles... max_memory = 2000 # NEEDS TO BE AS BIG AS AT LEAST 1 TRADING DAY!!! batch_size = 50 # 50 sequence_length = 250 # 500 discount = 0.95 training_days = 1 testing_days = 1 features_list = list(range(1, 33)) ## FULL features_list = list(range(1, 6)) ## SHORT!! training_store = ds.DataStore(training_days=training_days, features_list=features_list, sequence_length=sequence_length) features_length = training_store.get_features_length() env = Trading(data_store=training_store, sequence_length=sequence_length, features_length=features_length) num_actions = env.get_action_count( ) # [sell, buy, flat] # get From TRADING!! #testing_store = ds.DataStore(training_days=training_days, testing_days=10, features_list=features_list, sequence_length=sequence_length) mo = Models() rms = RMSprop(lr=0.0001, rho=0.9, epsilon=1e-06) use_ufcnn = True if use_ufcnn: model = mo.model_ufcnn_concat(sequence_length=sequence_length, features=features_length, nb_filter=15, filter_length=5, output_dim=num_actions, optimizer=rms, loss='mse', batch_size=batch_size, init="normal") base_model_name = "ufcnn" else: model = mo.atari_conv_model(output_dim=num_actions, features=features_length, loss='mse', sequence_length=sequence_length, optimizer=rms, batch_size=batch_size, init="normal") base_model_name = "atari" testing_store = ds.DataStore(training_days=training_days, testing_days=testing_days, features_list=features_list, sequence_length=sequence_length, mean=training_store.mean, std=training_store.std) test_env = Trading(data_store=testing_store, sequence_length=sequence_length, features_length=features_length) #model = mo.atari_conv_model(regression=False, output_dim=num_actions, features=features_length, nb_filter=50, # loss='mse', sequence_length=sequence_length, optimizer=rms, batch_size=batch_size) # If you want to continue training from a previous model, just uncomment the line bellow #mo.load_model("ufcnn_rl_training") # Define environment/game # Initialize experience replay object start_time = time.time() best_pnl = -99999. best_rndless_pnl = -99999. exp_replay = ExperienceReplay(max_memory=max_memory, env=env, sequence_dim=(sequence_length, features_length), discount=discount) lineindex = 0 # Train for e in range(epoch): loss = 0. game_over = False total_reward = 0 win_cnt = 0 loss_cnt = 0 random_cnt = 0 no_random_cnt = 0 ### loop over days-... for i in range(training_days): input_t = env.reset() j = 0 while not game_over: # game_over ... end of trading day... input_tm1 = input_t #print("INPUT ",input_tm1) # get next action if np.random.rand() <= epsilon: action = np.random.randint(0, num_actions, size=1)[0] random_cnt += 1 #print("RANDOM") else: q = model.predict(exp_replay.resize_input(input_tm1)) action = np.argmax(q[0]) no_random_cnt += 1 #print("SELECT") ##action = np.argmax(q) # apply action, get rewards and new state input_t, reward, game_over, idays, lineindex = env.act( action) if reward > 0: win_cnt += 1 if reward < 0: loss_cnt += 1 total_reward += reward if reward > 1.: reward = 1. if reward < -1.: reward = -1. # store experience exp_replay.remember([action, reward, idays, lineindex - 1], game_over) # adapt model if j > batch_size: # do not run exp_rep if the store is empty... inputs, targets = exp_replay.get_batch( model, batch_size=batch_size) curr_loss = model.train_on_batch( exp_replay.resize_input(inputs), targets) loss += curr_loss j += 1 rndless_pnl = self.get_randomless_pnl(test_env=test_env, model=model, testing_days=testing_days) secs = time.time() - start_time print( "Epoch {:05d}/{} | Time {:7.1f} | Loss {:11.4f} | Win trades {:5d} | Loss trades {:5d} | Total PnL {:8.2f} | Rndless PnL {:8.2f} | Eps {:.4f} | Rnd: {:5d}| No Rnd: {:5d} " .format(e, epoch, secs, loss, win_cnt, loss_cnt, total_reward, rndless_pnl, epsilon, random_cnt, no_random_cnt), flush=True) if epsilon > epsilon_min: epsilon *= epsilon_decay # Save trained model weights and architecture, this will be used by the visualization code if total_reward > best_pnl: mo.save_model(model, base_model_name + "_rl_best") best_pnl = total_reward else: mo.save_model(model, base_model_name + "_rl_training") if rndless_pnl > best_pnl: mo.save_model(model, base_model_name + "_rl_rndless_best") best_rndless_pnl = rndless_pnl
def initModels(self): self.corp = Models() self.corp.createLetterModel() self.corp.createWordUniGramModel()
def Classify(X, Y, cls, rep, k=5000): # Start moment Start_moment = time.time() title = 'Classificando com {} e {} k={}'.format(cls, rep, k) print(title) # Creating the K-fold cross validator K_fold = KFold(n_splits=10, shuffle=True) # Labels test_labels = np.array([], 'int32') test_pred = np.array([], 'int32') # Confusion Matrix confusion = np.array([[0, 0], [0, 0]]) # The test for train_indices, test_indices in K_fold.split(X): print('Running .... =)') X_train = [X[i] for i in train_indices] Y_train = [Y[i] for i in train_indices] X_test = [X[i] for i in test_indices] Y_test = [Y[i] for i in test_indices] train_x, train_y, test_x, test_y = Representations( ).get_representation(rep=rep, train_x=X_train, train_y=Y_train, test_x=X_test, test_y=Y_test, k=k, cat=None) # c = Counter(Y_train) # print(Counter(train_y)) # print({1:c.most_common(1)[0][1], 0:c.most_common(1)[0][1], 2:c.most_common(1)[0][1]}) sm = SMOTE(sampling_strategy='minority', random_state=None) # sm = SMOTE(sampling_strategy={1:c.most_common(1)[0][1], 0:c.most_common(1)[0][1], 2:c.most_common(1)[0][1]}, random_state=None) # print(len(train_y)) train_x, train_y = sm.fit_sample(train_x, train_y) # print(Counter(train_y)) test_labels = np.append(test_labels, Y_test) classifier = Models().get_classifier(cls) classifier.fit(train_x, train_y) # Train_Classifier(classifier, X_train, Y_train) pred = classifier.predict(test_x) test_pred = np.append(test_pred, pred) # print(test_y) # print(pred) confusion += confusion_matrix(test_y, pred) # report = classification_report(test_labels, test_pred, target_names=['Contrário', 'Favorável'] if plb =='polaridade' else ['neutro', 'opiniao']) report = classification_report(test_labels, test_pred, target_names=['no', 'yes']) print(report) print("Confusion matrix:") print(confusion) Finish_moment = time.time() tm = "It took " + str((Finish_moment - Start_moment)) + " seconds" print(tm)
def test_makeModels(self): m1 = Models() self.assertEqual(len(m1.modelList), 6)
__author__ = 'Placinta' from Models import Models encrypted_string = "Esp qtcde nzyqpcpynp zy esp ezatn zq Lcetqtntlw Tyepwwtrpynp hld spwo le Olcexzfes Nzwwprp ty estd jplc".upper( ) new_string = '' count = len(encrypted_string) possibilities = [] corp = Models() corp.createWordUniGramModel() for i in range(1, 37): character_list = [] new_string = "" for j in range(0, count): if encrypted_string[j] == ' ': character_list.append(' ') else: character_list.append( chr((ord(encrypted_string[j]) - 0x41 + i) % 26 + 0x41)) new_string = "".join(character_list).lower() possibilities.append(new_string) #print "{0}:".format(i) max_prob = -10000 max_string = '' for string in possibilities: probability = corp.getWordProbability(string) if probability > max_prob: max_prob = probability max_string = string
if es.ping(): print('Connected to Elasticsearch') else: print('Could not connect to elasticsearch') sys.exit() def remove_special_chars(text): '''This function removes the special chars from the text''' text=str(text) text = re.sub('[^A-Za-z0-9]+', ' ', text) text=text.lower() return text #Loading the Universal sentence encoder model model=Models() def get_query_doc(query): query=remove_special_chars(query) query_vector =model.get_vec_rep(query) # Getting the vector representation of text from the model #Creating a document structure to search with query. query_doc = { "query" : { "script_score" : { "query" : { "match_all": {} }, "script" : { "source": "cosineSimilarity(params.query_vector, 'text_vector') + 1.0",
def main(): N_CLASSES = 2 PREPROCESSING1 = 0 PREPROCESSING2 = 0 LOAD_AUTOENCODER1 = 1 LOAD_CLASSIFIER = 1 LOAD_MODEL = 1 VALIDATION_SPLIT = .1 LABELS = ["Attacks", "Normal"] pd.set_option('display.expand_frame_repr', False) pathModels = 'models/' pathDataset = 'datasets/' path = 'KDDTrain+aggregateOneCls10Features' pathTest = 'KDDTest+aggregateOneCls10Features' testpath = 'KDDTest+' train = pd.read_csv(pathDataset+path + ".csv") test = pd.read_csv(pathDataset+pathTest + ".csv") pathOutputTrain = pathDataset+path + 'Numeric.csv' pathOutputTest = pathDataset+pathTest + 'Numeric.csv' listNumerical10 = [ ' src_bytes', ' dst_bytes', ' diff_srv_rate', ' same_srv_rate', ' dst_host_srv_count', ' dst_host_same_srv_rate', ' dst_host_diff_srv_rate', ' dst_host_serror_rate'] prp = prep(train, test) tic_preprocessing1 = time.time() if (PREPROCESSING1 == 1): train, test = preprocessing(train, test, prp) train, test = scaler(train, test, listNumerical10) train.to_csv(pathDataset + pathOutputTrain, index=False) # X is an array test.to_csv(pathDataset + pathOutputTest, index=False) else: train = pd.read_csv(pathDataset + path + 'Numeric.csv') test = pd.read_csv(pathDataset + pathTest + 'Numeric.csv') clsT, clsTest = prp.getCls() train_normal = train[(train[clsT] == 1)] print("train normal:", train_normal.shape) train_anormal = train[(train[clsT] == 0)] test_normal = test[(test[clsTest] == 1)] test_anormal = test[(test[clsTest] == 0)] train_XN, train_YN, test_XN, test_YN = prp.getXY(train_normal, test_normal) train_XA, train_YA, test_XA, test_YA = prp.getXY(train_anormal, test_anormal) train_X, train_Y, test_X, test_Y = prp.getXY(train, test) toc_preprocessing1 = time.time() time_preprocessing1 = toc_preprocessing1 - tic_preprocessing1 print('Train data shape normal', train_XN.shape) print('Train target shape normal', train_YN.shape) print('Test data shape normal', test_XN.shape) print('Test target shape normal', test_YN.shape) print('Train data shape anormal', train_XA.shape) print('Train target shape anormal', train_YA.shape) print('Test data shape anormal', test_XA.shape) print('Test target shape anormal', test_YA.shape) # convert class vectors to binary class matrices fo softmax #print(train_Y.head()) train_Y2 = np_utils.to_categorical(train_Y, N_CLASSES) print("Target train shape after", train_Y2.shape) test_Y2 = np_utils.to_categorical(test_Y, N_CLASSES) print("Target test shape after", test_Y2.shape) callbacks_list = [ callbacks.EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=6, restore_best_weights=True), ] m = Models(N_CLASSES) if (LOAD_AUTOENCODER1 == 0): tic_autoencoder1 = time.time() print('Autoencoder only normal') # parametri per autoencoder p1 = { 'first_layer': 60, 'second_layer': 30, 'third_layer': 10, 'four_layer': 40, 'five_layer': 20, 'six_layer': 10, 'batch_size': 64, 'epochs': 150, 'optimizer': optimizers.Adam, 'kernel_initializer': 'glorot_uniform', 'losses': 'mse', 'first_activation': 'tanh', 'second_activation': 'tanh', 'third_activation': 'tanh'} autoencoder = m.deepAutoEncoder(train_XN, p1) autoencoder.summary() history = autoencoder.fit(train_XN, train_XN, validation_split=VALIDATION_SPLIT, batch_size=p1['batch_size'], epochs=p1['epochs'], shuffle=True, callbacks=callbacks_list, verbose=1) printPlotAccuracy(history, 'autoencoder') printPlotLoss(history, 'autoencoder') toc_autoencoder1 = time.time() time_autoencoder1 = toc_autoencoder1 - tic_autoencoder1 autoencoder.save(pathModels + 'autoencoderNormal.h5') else: print("Load autoencoder from disk") autoencoder = load_model(pathModels + 'autoencoderNormal.h5') # autoencoder.summary() # train predictions predictionsT = autoencoder.predict(train_X) mseT = np.mean(np.power(train_X - predictionsT, 2), axis=1) error_dfT = pd.DataFrame({'reconstruction_error': mseT}) error_dfT['true_class'] = train_Y[clsT] pathOutputError = 'ErrorTraining.csv' error_dfT.to_csv(pathDataset + pathOutputError, index=False) ################# mse test ################################# # test predictions tic_prediction_autoencoder1 = time.time() predictions = autoencoder.predict(test_X) mse = np.mean(np.power(test_X - predictions, 2), axis=1) toc_prediction_autoencoder1 = time.time() time_prediction_autoencoder1 = toc_prediction_autoencoder1 - tic_prediction_autoencoder1 error_df = pd.DataFrame({'reconstruction_error': mse}) error_df['true_class'] = test_Y[clsTest] pathOutputError = 'ErrorTest' error_df.to_csv(pathDataset + pathOutputError + testpath + '.csv', index=False) # ============================================================================= # C2 # # ============================================================================= pathmseTrain = 'ErrorTraining' pathmseTest = 'ErrorTest' columnNameErrorN = 'reconstruction_error' # prp = prep(train, test) mseTrain = pd.read_csv(pathDataset + pathmseTrain + '.csv') mseTest = pd.read_csv(pathDataset + pathmseTest + testpath + '.csv') pathOutputTrain = pathDataset + path + 'mse_Numeric.csv' pathOutputTest = pathDataset + pathTest + 'mse_Numeric.csv' train = pd.read_csv(pathDataset+path + ".csv") test = pd.read_csv(pathDataset+pathTest + ".csv") train[columnNameErrorN] = mseTrain[columnNameErrorN] test[columnNameErrorN] = mseTest[columnNameErrorN] listNumerical10 = [ ' src_bytes', ' dst_bytes', ' diff_srv_rate', ' same_srv_rate', ' dst_host_srv_count', ' dst_host_same_srv_rate', ' dst_host_diff_srv_rate', ' dst_host_serror_rate'] tic_preprocessing = time.time() if (PREPROCESSING2 == 1): train, test = preprocessing(train, test, prp) train, test = scaler(train, test, listNumerical10) train.to_csv(pathOutputTrain, index=False) test.to_csv(pathOutputTest, index=False) else: train = pd.read_csv(pathOutputTrain) test = pd.read_csv(pathOutputTest) clsT, clsTest = prp.getCls() train_normal = train[(train[clsT] == 1)] train_anormal = train[(train[clsT] == 0)] test_normal = test[(test[clsTest] == 1)] test_anormal = test[(test[clsTest] == 0)] train_XN, train_YN, test_XN, test_YN = prp.getXY(train_normal, test_normal) train_XA, train_YA, test_XA, test_YA = prp.getXY(train_anormal, test_anormal) train_X, train_Y, test_X, test_Y = prp.getXY(train, test) toc_preprocessing = time.time() time_preprocessing = toc_preprocessing - tic_preprocessing print('Train data shape normal', train_XN.shape) print('Train target shape normal', train_YN.shape) print('Test data shape normal', test_XN.shape) print('Test target shape normal', test_YN.shape) print('Train data shape anormal', train_XA.shape) print('Train target shape anormal', train_YA.shape) print('Test data shape anormal', test_XA.shape) print('Test target shape anormal', test_YA.shape) # convert class vectors to binary class matrices fo softmax train_Y2 = np_utils.to_categorical(train_Y, N_CLASSES) print("Train shape after", train_X.shape) print("Target train shape after", train_Y2.shape) test_Y2 = np_utils.to_categorical(test_Y, N_CLASSES) print("Target test shape after", test_Y2.shape) print("Test shape after", test_X.shape) callbacks_list = [ callbacks.EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=6, restore_best_weights=True), ] m = Models(N_CLASSES) if (LOAD_MODEL == 0): tic_autoencoder = time.time() print('Autoencoder only normal') # parameters per autoencoder p1 = { 'first_layer': 60, 'second_layer': 30, 'third_layer': 10, 'four_layer': 40, 'five_layer': 20, 'six_layer': 10, 'batch_size': 128, 'epochs': 150, 'optimizer': optimizers.Adam, 'kernel_initializer': 'glorot_uniform', 'losses': 'mse', 'first_activation': 'tanh', 'second_activation': 'tanh', 'third_activation': 'tanh'} autoencoder = m.deepAutoEncoder(train_XN, p1) autoencoder.summary() history = autoencoder.fit(train_XN, train_XN, validation_split=VALIDATION_SPLIT, batch_size=p1['batch_size'], epochs=p1['epochs'], shuffle=True, callbacks=callbacks_list, verbose=1) toc_autoencoder = time.time() time_autoencoder = toc_autoencoder - tic_autoencoder printPlotAccuracy(history, 'autoencoder') printPlotLoss(history, 'autoencoder') autoencoder.save(pathModels + 'autoencoderNormal2.h5') else: print("Load autoencoder from disk") autoencoder = load_model(pathModels + 'autoencoderNormal2.h5') #plot_model(autoencoder, to_file='autoencoder.png') # scale to improve classifier (!! change in fit!!) train_XS, test_XS = scaleSimple(train_X, test_X) print("Using softmax classifier:") if (LOAD_CLASSIFIER == 0): tic_classifier = time.time() # parameters for final model p2 = { 'batch_size': 64, 'epochs': 150, 'optimizer': optimizers.Adam, 'kernel_initializer': 'glorot_uniform', 'losses': 'binary_crossentropy', 'first_activation': 'tanh', 'second_activation': 'tanh', 'third_activation': 'tanh'} # model = m.modelWeightFixed(encoder, train_X, p2, encoder2) # class_weight = {0: 3, 1: 1} model = m.baselineModel(train_XS, p2) history3 = model.fit(train_XS, train_Y2, # validation_data=(test_X, test_Y2), validation_split=VALIDATION_SPLIT, batch_size=p2['batch_size'], epochs=p2['epochs'], shuffle=False, callbacks=callbacks_list, # class_weight=class_weight, verbose=1) toc_classifier = time.time() time_classifier = toc_classifier - tic_classifier printPlotAccuracy(history3, 'finalModel1') printPlotLoss(history3, 'finalModel1') model.save(pathModels + 'modelsoftmax2.h5') else: print("Load softmax from disk") model = load_model(pathModels + 'modelsoftmax2.h5') model.summary() #plot_model(model, to_file='model.png') ################# mse train ########################### # train predictions predictionsT = autoencoder.predict(train_X) pathOutputErrorT = 'ErrorTrain2.csv' mseT = np.mean(np.power(train_X - predictionsT, 2), axis=1) error_dfT = pd.DataFrame({'reconstruction_error': mseT}) error_dfT['true_class'] = train_Y[clsT] error_dfT.to_csv(pathDataset + pathOutputErrorT) #################test################################# # test predictions pathOutputErrorTest = 'ErrorTest2' tic_prediction_autoencoder = time.time() predictions = autoencoder.predict(test_X) mse = np.mean(np.power(test_X - predictions, 2), axis=1) toc_prediction_autoencoder = time.time() time_prediction_autoencoder1 = toc_prediction_autoencoder - tic_prediction_autoencoder error_df = pd.DataFrame({'reconstruction_error': mse}) error_df['true_class'] = test_Y[clsTest] ###################à classifier prediction ################### tic_prediction_classifier = time.time() predictions = model.predict(test_XS) toc_prediction_classifier = time.time() time_prediction_classifier = toc_prediction_classifier - tic_prediction_classifier predictionsT = model.predict(train_XS) ############# create confusion matrix ###################### # Predicting the Training set results y_predT = np.argmax(predictionsT, axis=1) cm = confusion_matrix(train_Y, y_predT) acc = accuracy_score(train_Y, y_predT, normalize=True) print('Softmax on training set') print(cm) print(acc) # Add prediction at dataframe with error reconstruction error_dfT['predict_softmax'] = y_predT error_dfT.to_csv(pathDataset+pathOutputErrorT, index=False) # Predicting the Test set results # prob = np.amax(predictions, axis=1) # print(prob) y_pred = np.argmax(predictions, axis=1) print(y_pred) cm = confusion_matrix(test_Y, y_pred) acc = accuracy_score(test_Y, y_pred, normalize=True) print(cm) print(acc) print('Softmax on test set') # Add prediction at dataframe with error reconstruction error_df['predict_softmax'] = y_pred # error_df['prob'] = prob error_df.to_csv(pathDataset+pathOutputErrorTest + testpath + '.csv', index=False) #########################################Phase after classification############################## # take to dataframe only prediction equals to 1 error_OnlyNormal = error_df[error_df['predict_softmax'] == 1] # error_OnlyNormalT = error_dfT[error_dfT['predict_softmax'] == 1] # error_OnlyNormalT.to_csv("onlyNormal2.csv", index=False) threshold = 0.002 tic_prediction_anomaly1 = time.time() y_predA = [0 if (e > threshold) else 1 for e in error_df.reconstruction_error.values] toc_prediction_anomaly1 = time.time() time_prediction_anomaly1 = toc_prediction_anomaly1 - tic_prediction_anomaly1 conf_matrix = confusion_matrix(error_df.true_class, y_predA) plt.figure(figsize=(12, 12)) sns.heatmap(conf_matrix, xticklabels=LABELS, yticklabels=LABELS, annot=True, fmt="d"); plt.title("Confusion matrix All") plt.ylabel('True class') plt.xlabel('Predicted class') plt.savefig("first matrix") plt.show() plt.close() tic_prediction_anomaly2 = time.time() y_predNormal = [0 if (e > threshold) else 1 for e in error_OnlyNormal.reconstruction_error.values] toc_prediction_anomaly2 = time.time() time_prediction_anomaly2 = toc_prediction_anomaly2 - tic_prediction_anomaly2 conf_matrix2 = confusion_matrix(error_OnlyNormal.true_class, y_predNormal) print(conf_matrix2) plt.figure(figsize=(12, 12)) sns.heatmap(conf_matrix2, xticklabels=LABELS, yticklabels=LABELS, annot=True, fmt="d"); plt.title("Confusion matrix Normal") plt.ylabel('True class') plt.xlabel('Predicted class') plt.savefig("second matrix") plt.show() if (PREPROCESSING1 == 1): print("Time for preprocessing 1 %s " % time_preprocessing1) if (PREPROCESSING2 == 1): print("Time for preprocessing 2 %s " % time_preprocessing) if (LOAD_AUTOENCODER1 == 0): print("Time for train autoencoder 1 %s " % time_autoencoder1) if (LOAD_MODEL == 0): print("Time for train autoencoder 2 %s " % time_autoencoder) if (LOAD_CLASSIFIER == 0): print("Time for train classifier %s " % time_classifier) print("Time for anomaly prediction %s " % (time_prediction_autoencoder1 + time_prediction_anomaly1)) print("Time for classifier prediction %s " % time_prediction_classifier) print("Time for 2 phase prediction %s " % (time_prediction_autoencoder1 + time_prediction_classifier + time_prediction_autoencoder1 + time_prediction_anomaly2))
from Models import Models import generator as gen data_gen_args = dict(rotation_range=0.2, width_shift_range=0.05, height_shift_range=0.05, shear_range=0.05, zoom_range=0.05, horizontal_flip=True, fill_mode='nearest') path = 'data/shapes/' myGene = gen.trainGenerator(2, path + 'train', 'image', 'label', data_gen_args, save_to_dir=None) model = Models(input_size=(256, 256, 1), model='unet', modelPath='unet_shapes.hdf5') model.train(myGene, steps=10, epochs=2) model.predict_images(path + 'test/')
config_file_path = results_folder + '/params.ini' args = train_utils.parse_params(config_file_path) L = args.num_classes args.image_dim = [128, 128, 3] #%% load saved network parameters and open new session tf.reset_default_graph() in_placeholder = tf.placeholder( tf.float32, shape=[None, None, None, L + args.image_dim[2]], name="in_placeholder") out_placeholder = tf.placeholder(tf.float32, shape=[None, None, None, L], name='out_placeholder') phase = tf.placeholder(tf.bool, name='phase') net_class = Models(args) net_class.build_model(in_placeholder, phase) sess = tf.Session() saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(checkpoints_path) if ckpt and ckpt.model_checkpoint_path: ckpt_path = checkpoints_path + 'my_model-' + str(checkpoint[0]) saver.restore(sess, ckpt_path) #%% if not os.path.exists(output_path): os.makedirs(output_path) image_list = sorted(os.listdir(pascal_path + '/images'))
def getModels(client): saveTickDateInString = Properties.SAVE_TICKDATE_IN_STRING models = Models(client, saveTickDateInString=saveTickDateInString) return models
def __init__(self): # super(self).__init__() # инициализация классификаторов self.models = Models()
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu test_mkdir(args.save_path) if args.modelname == 'mnist_2nn' or args.modelname == 'mnist_cnn': datasetname = 'mnist' with tf.variable_scope('inputs') as scope: inputsx = tf.placeholder(tf.float32, [None, 784]) inputsy = tf.placeholder(tf.float32, [None, 10]) elif args.modelname == 'cifar10_cnn': datasetname = 'cifar10' with tf.variable_scope('inputs') as scope: inputsx = tf.placeholder(tf.float32, [None, 24, 24, 3]) inputsy = tf.placeholder(tf.float32, [None, 10]) myModel = Models(args.modelname, inputsx) predict_label = tf.nn.softmax(myModel.outputs) with tf.variable_scope('loss') as scope: Cross_entropy = -tf.reduce_mean(inputsy * tf.log(predict_label), axis=1) with tf.variable_scope('train') as scope: optimizer = tf.train.GradientDescentOptimizer(args.learning_rate) train = optimizer.minimize(Cross_entropy) with tf.variable_scope('validation') as scope: correct_prediction = tf.equal(tf.argmax(predict_label, axis=1), tf.argmax(inputsy, axis=1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) saver = tf.train.Saver(max_to_keep=3)
dataset = Dataset(args) params = Parameters(args,dataset) print("""Load data done [%.1f s]. #user:%d, #list:%d, #item:%d, #train:%d, #valid:%d, #test:%d"""% (time() - t1, params.num_user, params.num_list, params.num_item,params.num_train_instances,params.num_valid_instances,params.num_test_instances)) args.args_str = params.get_args_to_string() t1 = time() print("args str: ",args.args_str) print("leng from list_items_list: ",len(utils.get_value_lists_as_list(params.list_items_dct))) print("leng from trainArrTriplets: ", len((params.trainArrTriplets[0]))) print("non-zero entries in train_matrix: ", params.train_matrix.nnz) # model-loss-optimizer defn ======================================================================= models = Models(params,device=device) model = models.get_model() if params.loss not in ['bpr']: #bpr criterion_li = torch.nn.BCELoss() #criterion_li = torch.nn.BCEWithLogitsLoss() ## new change made if params.optimizer == 'adam': optimizer_gnn = torch.optim.Adam(model.parameters(), lr=params.lr) optimizer_seq = torch.optim.Adam(model.parameters(), lr=params.lr) elif params.optimizer == 'rmsprop': optimizer_gnn = torch.optim.RMSprop(model.parameters(), lr=params.lr) optimizer_seq = torch.optim.RMSprop(model.parameters(), lr=params.lr) model.to(device) # training ======================================================================= ## param =============================
testing_store = ds.DataStore(training_days=training_days, testing_days=testing_days, features_list=features_list, sequence_length=sequence_length, mean=training_store.mean, std=training_store.std) features_length = training_store.get_features_length() env = Trading(data_store=testing_store, sequence_length=sequence_length, features_length=features_length) num_actions = env.get_action_count( ) # [sell, buy, flat] # get From TRADING!! mo = Models() start_time = time.time() best_pnl = -99999. exp_replay = ExperienceReplay(max_memory=max_memory, env=env, sequence_dim=(sequence_length, features_length)) if len(sys.argv) == 2: model_name = sys.argv[1] else: model_name = None if model_name is not None: model = mo.load_model(model_name)
from Models import Models from api import get_model_name models = Models() model_ids = models.model_ids model_names = [] for model_id in model_ids: model_names.append((model_id, get_model_name(model_id))) models.write_model_names(model_names)