def main(): #model = load_model('test_models/stock_multb_nosmooth_step4_1525242340/seq_len_50/model-1.h5') model = load_model( 'D:\Source\Repos\StockPrediction\stock_single_nosmooth_1525405252\seq_len_50/model-1.h5' ) seq_len = 50 predict_len = 7 #date_ranges = [(datetime.date(2016,11,1),datetime.date(2017,4,3)),(datetime.date(2002,3,18),datetime.date(2002,8,14)),(datetime.date(2015,3,8),datetime.date(2015,8,5))] date_ranges = [(datetime.date(2015, 1, 1), datetime.date(2015, 6, 1)), (datetime.date(2016, 1, 1), datetime.date(2016, 6, 1)), (datetime.date(2017, 1, 1), datetime.date(2017, 6, 1))] test_data = [ dataload.load_data('daily_spx.csv', seq_len, normalise_window=True, smoothing=False, date_range=date_range, train=False) for date_range in date_ranges ] predictions = [ dataload.predict_sequences_multiple(model, test[0], seq_len, predict_len) for test in test_data ] scores = [ model.evaluate(test[0], test[1], verbose=0) for test in test_data ] for prediction_index in range(len(predictions)): for sequence_index in range(len(predictions[prediction_index])): predictions[prediction_index][ sequence_index] = dataload.denormalize_sequence( test_data[prediction_index][2][sequence_index * 7], predictions[prediction_index][sequence_index]) for test_data_index in range(len(test_data)): for y_index in range(len(test_data[test_data_index][1])): test_data[test_data_index][1][ y_index] = dataload.denormalize_point( test_data[test_data_index][2][y_index], test_data[test_data_index][1][y_index]) model_plot = [(predictions[0], '2015'), (predictions[1], '2016'), (predictions[2], '2017')] #model_plot = [(predictions[0], 'Bullish'), (predictions[1], 'Bearish'), (predictions[2], 'Neutral')] results_fname = 'test_singleb_nosmooth_byyear_{}'.format( int(datetime.datetime.now().timestamp())) os.makedirs(results_fname) plot_results_multiple(model_plot, [t[1] for t in test_data], predict_len, fig_path=results_fname + '/plots.pdf') with open(results_fname + "/score.txt", "w") as fout: for score in scores: pprint.pprint(score, fout)
def main(): description = 'svm_smoothed' results_fname = '{}_{}'.format(description, int(datetime.now().timestamp())) model_fname = "model.sav" seq_len = 50 predict_len = 10 X_train, y_train, X_test, y_test = dataload.load_data( 'daily_spx.csv', seq_len, normalise_window=True, smoothing=True, smoothing_window_length=5, smoothing_polyorder=3, reshape=False) print('> Data Loaded') # Grid Search param_grid = dict( C=np.logspace(-4, 4), gamma=np.logspace(-9, 3), kernel=['rbf', 'linear'], ) grid = GridSearchCV(estimator=SVR(), param_grid=param_grid, scoring='neg_mean_squared_error', cv=5) start_time = datetime.now() grid_result = grid.fit(X_train, y_train) end_time = datetime.now() - start_time print('> Time elapsed: ', end_time) print('> Best parameters:') print(grid.best_params_) results = pd.DataFrame(grid.cv_results_) results.sort_values(by='rank_test_score', inplace=True) # Build best model model = SVR(kernel=grid.best_params_['kernel'], C=grid.best_params_['C'], gamma=grid.best_params_['gamma']) model.fit(X_train, y_train) predictions = model.predict(X_test) # Save Results os.makedirs(results_fname) results.to_csv('{}/results.csv'.format(results_fname)) pickle.dump(model, open('{}/{}'.format(results_fname, model_fname), 'wb'))
def main(): svm_model_path = 'svm_smoothed_1530436270/model.sav' results_fname = 'test_svm_smooth_byyear_{}'.format( int(datetime.datetime.now().timestamp())) seq_len = 50 predict_len = 7 os.makedirs(results_fname) # Define date ranges date_ranges = [(datetime.date(2015, 1, 1), datetime.date(2015, 6, 1)), (datetime.date(2016, 1, 1), datetime.date(2016, 6, 1)), (datetime.date(2017, 1, 1), datetime.date(2017, 6, 1))] # Load data model = pickle.load(open(svm_model_path, 'rb')) test_data = [ dataload.load_data('daily_spx.csv', seq_len, normalise_window=True, smoothing=False, date_range=date_range, train=False) for date_range in date_ranges ] # Generate predictions #[[print(seq.shape) for seq in test_date_range[0]] for test_date_range in test_data] #predictions = [[np.asscalar(model.predict(seq.transpose())) for seq in test_date_range[0]] for test_date_range in test_data] predictions = [ dataload.predict_sequences_multiple(model, test[0], seq_len, predict_len) for test in test_data ] for prediction_index in range(len(predictions)): for sequence_index in range(len(predictions[prediction_index])): predictions[prediction_index][ sequence_index] = dataload.denormalize_sequence( test_data[prediction_index][2][sequence_index * 7], predictions[prediction_index][sequence_index]) for test_data_index in range(len(test_data)): for y_index in range(len(test_data[test_data_index][1])): test_data[test_data_index][1][ y_index] = dataload.denormalize_point( test_data[test_data_index][2][y_index], test_data[test_data_index][1][y_index]) # Save plot model_plot = [(predictions[0], '2015'), (predictions[1], '2016'), (predictions[2], '2017')] plot_results_multiple(model_plot, [t[1] for t in test_data], predict_len, fig_path=results_fname + '/plots.pdf')
def loadData(self): print "start loading data.." if self.analyze_type =='expression': if self.exp_file == None: raise Exception("expression file is not specified") dataload_result = dl.load_data(exp_file=self.exp_file, gmt_file=self.gmt_file, analyzing_type=self.analyze_type , exp_normalize_tpye= self.exp_normalization_type) elif self.analyze_type =='mutation': if (self.mutation_file ==None): raise Exception("mutation file is not specified") elif (self.smoothing_source_file ==None): raise Exception("smoothing source file is not specified") dataload_result = dl.load_data(mutation_file =self.mutation_file , gmt_file=self.gmt_file, analyzing_type=self.analyze_type , network_file_for_smoothing=self.smoothing_source_file) elif self.analyze_type =='mut_with_exp': if (self.mutation_file ==None): raise Exception("mutation file is not specified") elif (self.exp_file ==None): raise Exception("expression file is not specified") elif (self.smoothing_source_file ==None): raise Exception("smoothing source file is not specified") dataload_result = dl.load_data(exp_file=self.exp_file, mutation_file =self.mutation_file , gmt_file=self.gmt_file, analyzing_type=self.analyze_type , network_file_for_smoothing=self.smoothing_source_file , exp_normalize_tpye= self.exp_normalization_type) else: raise Exception('unspecified analyzing type') return dataload_result
def get_AE_feats(encoder, data_frame_in, subtask, params): AE_feats = [] labels = [] ind_selected = [] lengths = [] for idx in data_frame_in.index: #print(idx) temp_train_Y = data_frame_in[subtask][idx] if np.isnan(temp_train_Y): print('nan label') continue temp_X = load_data(data_frame_in, idx, params) #temp_X = temp_X + np.random.normal(0,1,(temp_X.shape)) temp_feats = encoder.predict(temp_X) lengths.append(temp_feats.shape[0]) #temp_pad = np.zeros((max_len-temp_feats.shape[0],latent_dim)) #temp_feats = np.concatenate((temp_feats,temp_pad),axis=0) #temp_feats = temp_feats.reshape(1,-1,latent_dim) ind_selected.append(idx) AE_feats.append(temp_feats) #temp_train_Y = to_categorical(temp_train_Y,5) #temp_train_Y = np.expand_dims(temp_train_Y,axis=0) labels.append(temp_train_Y) # max_len = np.max(lengths) # latent_dim = temp_feats.shape[-1] for i in range(len(AE_feats)): temp_pad = np.zeros((max_len - lengths[i], latent_dim)) AE_feats[i] = np.concatenate((AE_feats[i], temp_pad), axis=0) AE_feats[i] = AE_feats[i].reshape(1, -1, latent_dim) # AE_feats = np.vstack(AE_feats) labels = np.vstack(labels) ind_selected = np.array(ind_selected) return AE_feats, labels, ind_selected
deconv6 = deconv_layer(conv5_4, 512, 64, 1) # conv6 = conv_layer2(deconv6, 64, 64, "conv6") # 20*14 deconv7 = deconv_layer(deconv6, 64, 8, 0) # conv7 = conv_layer2(deconv7, 8, 8, "conv7") # 40*28 deconv8 = deconv_layer(deconv7, 8, 1, 0) # conv8 = conv_layer2(deconv8, 1, 1, "conv8") # 80*56 y_ = tf.reshape(deconv8, [-1, Out_Width * Out_Height]) # loss function loss = tf.reduce_sum(tf.square(y_ - y_reshape)) train = tf.train.AdamOptimizer(lr).minimize(loss) data = dataload.load_data(is_training) saver = tf.train.Saver() sess = tf.Session() sess.run(tf.initialize_all_variables()) for step in range(TRAIN_STEP): images, depths = dataload.get_batch(data, BATCH_SIZE) # print(depths[0]) # print(sess.run(conv8, feed_dict={x:images, y:depths})) # print("image:", images[8]) if is_training: if step % 1 == 0: loss_value = sess.run(loss, feed_dict={x: images, y: depths})
# Import basic libraries and keras import os import json import keras from keras.preprocessing.text import Tokenizer from keras.models import Sequential, load_model from keras.layers import Dense, Embedding, LSTM from keras.preprocessing.sequence import pad_sequences from dataload import load_data # Load input data and labels (0 to 4) train_x, train_y = load_data() # Use the 3000 most popular words found in our dataset max_words = 3000 # Tokenize the data tokenizer = Tokenizer(num_words=max_words) tokenizer.fit_on_texts(train_x) dictionary = tokenizer.word_index # Save tokenizer dictionary to file if not os.path.exists('dictionary.json'): with open('dictionary.json', 'w') as outfile: json.dump(tokenizer.word_index, outfile) # For each tweet, change each token to its ID in the Tokenizer's word_index sequences = tokenizer.texts_to_sequences(train_x) train_x = pad_sequences(sequences, maxlen=300) train_y = keras.utils.to_categorical(train_y, 5) # Check if there is a pre-trained model
# # ypos += val # # ax.text(rowNum, ypos , "{0:.2f}".format(val), color='black' ,ha='center') # # featuer+=1 # # ypos = 0 # plt.title('Average Number of Added Features in Each Epoch') # plt.savefig('./added_featues({0}).png'.format(max_dist)) # print(frams) if __name__ == '__main__': torch.multiprocessing.freeze_support() train = False # Define what device we are using # print("CUDA Available: ",torch.cuda.is_available()) device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu") #with sarogate _model (x_malware,x_benign) , test_data , feature_vectore_size,features = dataload.load_data(train) # (c_x_malware,c_x_benign) , c_test_data , c_feature_vectore_size = dataload.load_data(train) # (s_x_malware,s_x_benign) , s_test_data , s_feature_vectore_size = dataload.load_data(train) x_mal= x_malware adv_sample_path = './MalwareDataset/adversarial_samples_malJSMA_all/' # in black box setting # classifiers = ['SVM_MALJSMA_all', 'SVM_MALJSMA_all_R1', 'SVM_MALJSMA_all_R2', 'SVM_MALJSMA_all_R3', 'SVM_MALJSMA_all_R4', 'SVM_MALJSMA_all_R5', 'SVM_MALJSMA_all_R6', 'SVM_MALJSMA_all_R7', 'SVM_MALJSMA_all_R8', 'SVM_MALJSMA_all_R9', 'SVM_MALJSMA_all_R10', 'SVM_MALJSMA_all_R11' , 'SVM_MALJSMA_all_R12', 'SVM_R13', 'SVM_R14', 'SVM_R15', 'SVM_R16', 'SVM_R17']# ,'RF' , 'RBF_SVM', 'LR', 'DT' , 'KNN', 'MLP']# ,'DNN'] # classifiers = ['SVM_MALJSMA_all', 'SVM_MALJSMA_all_R1', 'SVM_MALJSMA_all_R2', 'SVM_MALJSMA_all_R3', 'SVM_MALJSMA_all_R4', 'SVM_MALJSMA_all_R5', 'SVM_MALJSMA_all_R6', 'SVM_MALJSMA_all_R7', 'SVM_MALJSMA_all_R8', 'SVM_MALJSMA_all_R9', 'SVM_MALJSMA_all_R10', 'SVM_MALJSMA_all_R11' , 'SVM_MALJSMA_all_R12', 'SVM_R13', 'SVM_R14', 'SVM_R15', 'SVM_R16', 'SVM_R17']# ,'RF' , 'RBF_SVM', 'LR', 'DT' , 'KNN', 'MLP']# ,'DNN'] ####<<<<<<<<generalizability # classifiers=[] # models_path = './models/SVMmodelsESORICS2020Genralization/' # models = [f for f in listdir(models_path) if isfile(join(models_path,f))] # for model in models:
import numpy as np from gensim.models import Word2Vec from dataload import load_data def train_word2vec(data): model = Word2Vec(data, sg=1, size=100, window=5, min_count=1, workers=4) model.save("word2vec.model") if __name__ == '__main__': docs, _ = load_data("data_ma.npz", 40, 25000, 700) train_word2vec(docs) model2 = Word2Vec.load("word2vec.model") temp = model2.wv[docs[0]] temp = temp[:20] print(temp.shape) temp2 = np.zeros((100, 10)).T print(temp2.shape) temp = np.insert(temp2, 0, values=temp, axis=0) print(temp.shape) # [model2[text] for text in docs] # print(temp)
mse = criterion(SR, target) psnr = 10 * log10(1 / mse.data[0]) avg_psnr += psnr print(iteration) print("===> Avg. SR PSNR: {:.4f} dB".format(avg_psnr / iteration)) opt = parser.parse_args() print(opt) gpuid = 0 print("===> Loading datasets") root_dir = '/tmp4/hang_data/DIV2K' test_dir = 'DIV2K_validate_HR_x4' targets = dataload.load_data(root_dir, test_dir) test_dir = 'DIV2K_validate_LR_x4' inputs = dataload.load_data(root_dir, test_dir) test_images = {"targets": targets, "inputs": inputs} SR_dir = join(root_dir, 'SRResNet_DIV_train_x4') if os.path.isdir(SR_dir): pass else: os.mkdir(SR_dir) model = torch.load(opt.model, map_location=lambda storage, loc: storage)["model"] #model = torch.load(opt.model)["model"] model = model.cuda(gpuid) criterion = torch.nn.MSELoss(size_average=True)
args.loss_function = 'Focal' # CE or MSE or Focal args.base = 'RoBERTa' args.device = 0 args.SEED = 42 args.MAX_LEN = 256 args.batch_size = 16 args.lr = 1e-4 args.adam_epsilon = 1e-8 args.epochs = 50 args.result_name = args.Senti_or_Emo + '_Mode_' + str( args.mode) + '_' + args.loss_function + '_Epochs_' + str( args.epochs) + '.csv' ## LOAD DATA from dataload import load_data train_length, train_dataloader, valid_dataloader, test_dataloader = load_data( args, DATA_PATH) args.train_length = train_length ## TRAIN THE MODEL from model import Emo_Generation from transformers import RobertaConfig, RobertaModel, PreTrainedModel from train import train_model if args.base == 'RoBERTa': model = Emo_Generation.from_pretrained('roberta-base', mode=args.mode).cuda(args.device) else: model = Emo_Generation.from_pretrained('bert-base-uncased', mode=args.mode).cuda(args.device) train_model(model, args, train_dataloader, valid_dataloader, test_dataloader)
output = drop(output) # 全连接/sigmoid层 weight_dense = Dense(ALL_TAGS, activation='softmax') # 2500 tag_out = weight_dense(output) model = Model(inputs=[wordvec_input, lda_input], outputs=[tag_out]) # compile model model.compile(optimizer='adam', loss=mean_negative_log_probs, metrics=[compute_precision, compute_recall]) model.summary() # 装载数据 new_brs, sfs = load_data(path="data_ma.npz", lenth=40, num_words=25000, num_sfs=700, per=0.8) # new_brs = new_brs[:200] # sfs = sfs[:200] sfs_back = sfs[:] # 对tag进行裁切,只保留每篇文章的前x个 sfs = [sf[:1] for sf in sfs] # 加载字典文件 with open("dictionary.b", "rb") as f: dic = pickle.load(f) # 加载LDA模型 lda_model = LdaModel.load("lda_model")
#for i, data in enumerate(predicted_data[1][0]): #padding = [None for p in range(i * prediction_len)] #ax2.plot(padding + data, label='Prediction') ##plt.legend() #ax2.set_title(predicted_data[1][1]) plt.show() if __name__=='__main__': global_start_time = time.time() #epochs = 1 epochs = 100 seq_len = 40 print('> Loading data... ') X_train, y_train, X_test, y_test = dataload.load_data('daily_spx.csv', seq_len, True) print('> Data Loaded. Compiling...') #lstm_model = lstm.build_model([1, seq_len, 100, 1]) model_layers = [1, 2, 3, 4] cnn_models = [ cnn_batchnorm_lstm.build_model([1, seq_len, 100, 1], x) for x in model_layers ] [model.fit( X_train, y_train, batch_size=512, nb_epoch=epochs, validation_split=0.05) for model in cnn_models
import model vocabulary_size = 50000 embedding_size=128 maxlen = 150 # cut texts after this number of words (among top max_features most common words) batchSize = 32 classNum=2 learning_rate=0.001 epochs=100 ckpt_dir="" path='C:\\wuwei\\work\\github\\data\\imdb.pkl' print('Loading data...') trainData, testData = dataload.load_data(path=path, nb_words=vocabulary_size) print(len(trainData), 'train sequences') print(len(testData), 'test sequences') trainBatches=dataload.get_batches(data=trainData) testData=dataload.get_batches(data=testData) def train(): graph = tf.Graph() with graph.as_default(): global_step = tf.contrib.framework.get_or_create_global_step() encoder_inputs = tf.placeholder(shape=(batchSize, None), dtype=tf.int32, name='encoder_inputs') class_targets = tf.placeholder(shape=(batchSize,), dtype=tf.int32, name='class_targets')
image_nc = 1 batch_size = 128 hidden_size = 200 # niose_size = 100 output_size = 1 gen_input_nc = image_nc # Define what device we are using print("CUDA Available: ", torch.cuda.is_available()) device = torch.device("cuda" if ( use_cuda and torch.cuda.is_available()) else "cpu") # test adversarial examples in Drebin training dataset # mnist_dataset = torchvision.datasets.MNIST('./dataset', train=True, transform=transforms.ToTensor(), download=True) # train_dataloader = DataLoader(mnist_dataset, batch_size=batch_size, shuffle=False, num_workers=1) train_laoder, feature_vectore_size = dataload.load_data(train=True) # load the pretrained model pretrained_model = "./malware_classifier_net.pth" target_model = malware_classifier_net(input_size=feature_vectore_size, hidden_size=hidden_size, output_size=output_size).to(device) target_model.load_state_dict(torch.load(pretrained_model)) target_model.eval() # load the generator of adversarial examples pretrained_generator_path = './models/net_malG_epoch_60.pth' pretrained_G = models.Mal_Generator( input_size=feature_vectore_size, hidden_size=hidden_size, output_size=feature_vectore_size).to(device)
# description: several-word description of the purpose of the run description = 'single_gru' results_fname = '{}_{}'.format(description, int(datetime.now().timestamp())) early_stopping = EarlyStopping(patience=20) tensorboard = TensorBoard(log_dir='tensorboard', write_grads=True) for seq_len in seq_lens: print('Seq len: {}'.format(seq_len)) print('> Loading data... ') X_train, y_train, X_test, y_test = dataload.load_data( 'daily_spx.csv', seq_len, normalise_window=True, smoothing=False, smoothing_window_length=5, smoothing_polyorder=3, reshape=True) #X_train, y_train, X_test, y_test = dataload.load_sin_data(seq_len, normalise_window=True) print('> Data Loaded. Compiling...') # Grid search parameters kernel_sizes = [5, 9] step_sizes = [2] single_branch = True stride = [3] lstm_units = [200, 400] branches = [3] model = False
# # ypos = 0 # plt.title('Average Number of Added Features in Each Epoch') # plt.savefig('./added_featues({0}).png'.format(max_dist)) # print(frams) if __name__ == '__main__': torch.multiprocessing.freeze_support() train = False # Define what device we are using print("CUDA Available: ", torch.cuda.is_available()) device = torch.device("cuda" if ( use_cuda and torch.cuda.is_available()) else "cpu") #with sarogate _model (x_malware, x_benign ), test_data, feature_vectore_size, features = dataload.load_data(train) # (c_x_malware,c_x_benign) , c_test_data , c_feature_vectore_size = dataload.load_data(train) # (s_x_malware,s_x_benign) , s_test_data , s_feature_vectore_size = dataload.load_data(train) x_mal = x_malware adv_sample_path = './MalwareDataset/adversarial_samples_malJSMA_all/' # in black box setting # classifiers = ['SVM_MALJSMA_all', 'SVM_MALJSMA_all_R1', 'SVM_MALJSMA_all_R2', 'SVM_MALJSMA_all_R3', 'SVM_MALJSMA_all_R4', 'SVM_MALJSMA_all_R5', 'SVM_MALJSMA_all_R6', 'SVM_MALJSMA_all_R7', 'SVM_MALJSMA_all_R8', 'SVM_MALJSMA_all_R9', 'SVM_MALJSMA_all_R10', 'SVM_MALJSMA_all_R11' , 'SVM_MALJSMA_all_R12', 'SVM_R13', 'SVM_R14', 'SVM_R15', 'SVM_R16', 'SVM_R17']# ,'RF' , 'RBF_SVM', 'LR', 'DT' , 'KNN', 'MLP']# ,'DNN'] # classifiers = ['SVM_MALJSMA_all', 'SVM_MALJSMA_all_R1', 'SVM_MALJSMA_all_R2', 'SVM_MALJSMA_all_R3', 'SVM_MALJSMA_all_R4', 'SVM_MALJSMA_all_R5', 'SVM_MALJSMA_all_R6', 'SVM_MALJSMA_all_R7', 'SVM_MALJSMA_all_R8', 'SVM_MALJSMA_all_R9', 'SVM_MALJSMA_all_R10', 'SVM_MALJSMA_all_R11' , 'SVM_MALJSMA_all_R12', 'SVM_R13', 'SVM_R14', 'SVM_R15', 'SVM_R16', 'SVM_R17']# ,'RF' , 'RBF_SVM', 'LR', 'DT' , 'KNN', 'MLP']# ,'DNN'] ####<<<<<<<<generalizability # classifiers=[] # models_path = './models/SVMmodelsESORICS2020Genralization/' # models = [f for f in listdir(models_path) if isfile(join(models_path,f))] # for model in models: # classifiers.append(model[0:-4])
model,encoder = make_DNN_model(feat_size=params['frame_length']*3,latent_dim=latent_dim) checkpointer = ModelCheckpoint(filepath=savedir+'mlp_AE_uad_'+str(use_ancillarydata)+params_append_str+'_ld_'+str(latent_dim)+'.h5', verbose=1, save_best_only=True) early_stopping = EarlyStopping(monitor='val_loss', patience=5) #model.compile(optimizer='adam',loss='mse',metrics=['mse']) lr=0.001 sgd = SGD(lr=lr, decay=0, momentum=0.9, nesterov=True) model.compile(optimizer='adam',loss='mse',metrics=['mae']) batch_size = 500 epochs = 200 model.fit(train_X,train_X,validation_split=0.2,batch_size=batch_size,epochs=epochs,shuffle=True, verbose=1,callbacks=[checkpointer, early_stopping]) model.load_weights(savedir+'mlp_AE_uad_'+str(use_ancillarydata)+params_append_str+'_ld_'+str(latent_dim)+'.h5') encoder.save(savedir+'mlp_encoder_uad_'+str(use_ancillarydata)+params_append_str+'_ld_'+str(latent_dim)+'.h5') ''' encoder = load_model(savedir + 'mlp_encoder_uad_' + str(use_ancillarydata) + '_ld_' + str(latent_dim) + '.h5') if saveAEFeats: save_feats_path = '/export/b19/mpgill/BeatPD/AE_30ft_high_pass/' for idx in df_train_label.index: print(idx) temp_X = load_data(df_train_label, idx, cleanParams) temp_feats = encoder.predict(temp_X) name = df_train_label["measurement_id"][idx] sio.savemat(save_feats_path + name + '.mat', {'feat': temp_feats})
import tensorflow as tf from dataload import load_data DATA_DIR = './data/' data = load_data(DATA_DIR) # 입력 데이터를 위핚 플레이스홀더, 가중치 X = tf.placeholder(tf.float32, [None, 1024]) #1024 Y = tf.placeholder(tf.float32, [None, 22]) #42 W1 = tf.Variable(tf.random_normal([1024, 256], stddev=0.01)) L1 = tf.nn.relu(tf.matmul(X, W1)) W2 = tf.Variable(tf.random_normal([256, 256], stddev=0.01)) L2 = tf.nn.relu(tf.matmul(L1, W2)) W3 = tf.Variable(tf.random_normal([256, 22], stddev=0.01)) model = tf.matmul(L2, W3) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y)) optimizer = tf.train.AdamOptimizer(0.001).minimize(cost) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) batch_size = 40 total_batch = int(data.train.num_examples / batch_size) for epoch in range(100): total_cost = 0
import cnn_model_words import dataload import keras import random import numpy as np from keras import backend as K if __name__ == '__main__': BATCH_SIZE = 100 EPOCHS = 100 NUM_WORDS = 5000 HEIGHT = 32 WIDTH = 100 # Load data word_data = dataload.load_data(NUM_WORDS) print('Loaded data') # Split into train and test sets train_images = [] train_labels = [] test_images = [] test_labels = [] random.seed(100) for k, v in word_data.items(): # Shuffle data pixels = [p['pixel_array'] for p in v['points']] random.shuffle(pixels) del pixels[int(len(pixels) * 0.5):] # Trims list to save memory
def index(): if ac.len() < 1: load_data(redis_server, redis_port, redis_password) return render_template('search.html')
def main(_): # Load parameters from yaml file specified in argv paramfilename = sys.argv[1] modelname, param = load_config(paramfilename) n_classes = cfg.N_CLASSES with tf.Session() as sess: tf.logging.set_verbosity(tf.logging.INFO) # Placeholders for signals preprocessing inputs X_data = tf.placeholder(tf.float32, [None, cfg.SAMRATE], name='X_data') noise_factor = tf.placeholder(tf.float32, shape=(), name='noise_factor') noise_frac = tf.placeholder(tf.float32, shape=(), name='noise_frac') # Define the audio features x_mfcc, x_mel, x_zcr, x_rmse = signalProcessBatch( X_data, noise_factor=noise_factor, noise_frac=noise_frac, window=param['window'], maxamps=cfg.MAXAMPS, sr=cfg.SAMRATE, num_mel_bins=param['num_mel_bins'], num_mfccs=param['num_mfccs']) # Placeholder variables output (1-hot vectors of size n_classes) y_true = tf.placeholder(tf.float32, shape=[None, n_classes], name='y_true') y_true_class = tf.argmax(y_true, 1, name='y_true_class') # Dropout keep probability and training flag dropout_prob = tf.placeholder(tf.float32, shape=(), name='dropout_prob') is_training = tf.placeholder(tf.bool, name="is_training") # Prediction from model model = buildModel(modelname) y_pred = model(x_mel, x_mfcc, x_zcr, x_rmse, dropout_prob=dropout_prob, is_training=is_training) # Cross entropy loss function with softmax then takes mean loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y_true)) tf.summary.scalar('loss', loss) # Train and backprop gradients function optimizer = tf.train.AdamOptimizer( learning_rate=param['learning_rate']).minimize(loss) # Evaluation and accuracy y_pred_class = tf.argmax(y_pred, 1, name='y_pred_class') correct_prediction = tf.equal(y_pred_class, y_true_class) confusion_matrix = tf.confusion_matrix(y_true_class, y_pred_class, num_classes=n_classes) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) # Merge all summaries merged = tf.summary.merge_all() # Saver for checkpoints saver = tf.train.Saver(tf.global_variables()) # Set path to summary logs and checkpoints now = datetime.now() logs_path = os.path.join(cfg.OUT_DIR, now.strftime("%Y%m%d-%H%M%S"), 'summaries') # Create summary writers train_writer = tf.summary.FileWriter(os.path.join(logs_path, 'train'), graph=sess.graph) test_writer = tf.summary.FileWriter(os.path.join(logs_path, 'test'), graph=sess.graph) # Initialize variables tf.global_variables_initializer().run() # Main training section start_time = time.time() msg = "\n====================\nStarting training...\n====================" tf.logging.info(msg) # Load the audio file info dataframe df = load_data(cfg.DATA_DIR) # Log msg = "\nModel: {}\nParam File: {}\nIterations: {}" tf.logging.info( msg.format(modelname, paramfilename, param['num_iterations'])) tf.logging.info(" Begin iterations...") for i in xrange(param['num_iterations']): # Unknown training weight adjustment for first 5000 cycles if i < 5000: w = (1 + param['unknown_weight_scaler'] * i) * param['unknown_weight'] else: w = param['unknown_weight'] # Get the training batch X_train, y_true_batch = load_batch( df, cfg.DATA_DIR, batch_size=param['batch_size'], silence_size=param['silence_size'], label='train', random=True, seed=None, w=w, samples=cfg.SAMRATE) # Preprocess the training batch x_mfcc_batch, x_mel_batch, x_zcr_batch, x_rmse_batch = sess.run( [x_mfcc, x_mel, x_zcr, x_rmse], feed_dict={ X_data: X_train, noise_factor: param['noise_factor_value'], noise_frac: param['noise_frac_value'] }) # Training optimization sess.run(optimizer, feed_dict={ x_mel: x_mel_batch, x_mfcc: x_mfcc_batch, x_zcr: x_zcr_batch, x_rmse: x_rmse_batch, y_true: y_true_batch, dropout_prob: param['dropout_prob_value'], is_training: True }) # Checkpoint save and validation step if ((i + 1) % param['checkpoint_step'] == 0) or (i == param['num_iterations'] - 1): # Checkpoint checkpoint_path = os.path.join( logs_path, "{}-{}.ckpt".format(modelname, paramfilename[:-4])) msg = " Saving checkpoint to: {}-{}" tf.logging.info(msg.format(checkpoint_path, i + 1)) saver.save(sess, checkpoint_path, global_step=i + 1) # Load the validation batches val_batch_size = 100 total_val_accuracy = 0 total_conf_matrix = None val_set_size = 6700 for j in xrange(0, val_set_size, val_batch_size - param['silence_size']): X_val, y_true_val = load_batch( df, cfg.DATA_DIR, batch_size=val_batch_size, silence_size=param['silence_size'], label='val', random=False, seed=j, w=1.0, samples=cfg.SAMRATE) # Preprocess the validation batch x_mfcc_val, x_mel_val, x_zcr_val, x_rmse_val = sess.run( [x_mfcc, x_mel, x_zcr, x_rmse], feed_dict={ X_data: X_val, noise_factor: 0.0, noise_frac: 0.0 }) # Validation summary val_summary, loss_val, acc_val, conf_matrix = sess.run( [merged, loss, accuracy, confusion_matrix], feed_dict={ x_mel: x_mel_val, x_mfcc: x_mfcc_val, x_zcr: x_zcr_val, x_rmse: x_rmse_val, y_true: y_true_val, dropout_prob: 1.0, is_training: False }) total_val_accuracy += (acc_val * val_batch_size) / val_set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix msg = " Confusion Matrix:\n {}" tf.logging.info(msg.format(total_conf_matrix)) msg = " VALIDATION ACC: {:6f}, (N = {})" tf.logging.info(msg.format(total_val_accuracy, val_set_size)) # Display step if (i == 0) or ((i + 1) % param['display_step'] == 0) or (i == param['num_iterations'] - 1): # Training summary, loss and accuracy train_summary, loss_train, acc_train = sess.run( [merged, loss, accuracy], feed_dict={ x_mel: x_mel_batch, x_mfcc: x_mfcc_batch, x_zcr: x_zcr_batch, x_rmse: x_rmse_batch, y_true: y_true_batch, dropout_prob: 1.0, is_training: False }) train_writer.add_summary(train_summary, i + 1) # Display message msg = " OPTIMIZE STEP: {:6d}, LOSS, {:.6f}, ACC: {:.6f}" tf.logging.info(msg.format(i + 1, loss_train, acc_train)) # Check if loss is below minimum if loss_train < param['min_loss']: msg = " Min loss acheived: {}" tf.logging.info(msg.format(loss_train)) break # Load the testing batches test_batch_size = 100 total_test_accuracy = 0 total_conf_matrix = None test_set_size = 6700 for j in xrange(0, test_set_size, test_batch_size - param['silence_size']): X_test, y_true_test = load_batch( df, cfg.DATA_DIR, batch_size=test_batch_size, silence_size=param['silence_size'], label='test', random=False, seed=j, w=1.0, samples=cfg.SAMRATE) # Preprocess the testing batch x_mfcc_test, x_mel_test, x_zcr_test, x_rmse_test = sess.run( [x_mfcc, x_mel, x_zcr, x_rmse], feed_dict={ X_data: X_test, noise_factor: 0.0, noise_frac: 0.0 }) # Testing summary test_summary, loss_test, acc_test, conf_matrix = sess.run( [merged, loss, accuracy, confusion_matrix], feed_dict={ x_mel: x_mel_test, x_mfcc: x_mfcc_test, x_zcr: x_zcr_test, x_rmse: x_rmse_test, y_true: y_true_test, dropout_prob: 1.0, is_training: False }) test_writer.add_summary(test_summary, i + 1) total_test_accuracy += (acc_test * test_batch_size) / test_set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix msg = " Confusion Matrix:\n {}" tf.logging.info(msg.format(total_conf_matrix)) msg = " TESTING ACC: {:6f}, (N = {})" tf.logging.info(msg.format(total_test_accuracy, test_set_size)) # End-time end_time = time.time() msg = " Time usage: {}" tf.logging.info( msg.format(timedelta(seconds=int(round(end_time - start_time)))))
args.device = torch.device('cuda') args.kernel_num = 100 args.kernel_sizes = '3,4,5' args.kernel_sizes = [int(k) for k in args.kernel_sizes.split(',')] args.dropout = 0.1 # Preprocess if not os.path.exists(TEST_PRE_PATH): logger.info('Preprocessing begin...') preprocess_write(TEST_PATH, TEST_PRE_PATH) else: logger.info('No need to preprocess!') # Load data logger.info('Loading data begin...') text_field, label_field, train_data, train_iter, dev_data, dev_iter = load_data(amazon_train, amazon_test, args) text_field.build_vocab(train_data, dev_data, min_freq=10) label_field.build_vocab(train_data) logger.info('Length of vocab is: ' + str(len(text_field.vocab))) args.vocab_size = len(text_field.vocab) args.word_2_index = text_field.vocab.stoi # tuple of dict({word: index}) args.index_2_word = text_field.vocab.itos # only list of words # Initial word embedding logger.info('Getting pre-trained word embedding ...') args.pretrained_weight = get_pretrained_word_embed(small_glove_path, args, text_field) # Build model and train
from sklearn.preprocessing import StandardScaler from sklearn.datasets import make_moons, make_circles, make_classification from sklearn.neural_network import MLPClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.gaussian_process.kernels import RBF from sklearn.tree import DecisionTreeClassifier from sklearn.naive_bayes import GaussianNB from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier from dataload import load_data from sklearn import preprocessing from IPython import embed import time h = .02 # step size in the mesh X_train, y_train, X_test, y_test = load_data() # test_label = np.where(y_test == 1)[1] # min_max_scaler = preprocessing.MinMaxScaler() # X_train_minmax = min_max_scaler.fit_transform(X_train) # X_test_minmax = min_max_scaler.fit_transform(X_test) scaler = preprocessing.StandardScaler().fit(X_train) trainX = scaler.transform(X_train) testX = scaler.transform(X_test) names = [ "Nearest Neighbors", "Decision Tree", "Neural Net", "AdaBoost", "Naive Bayes" ] classifiers = [
def main(): # Original 4 pipelines model = load_model('single_cnn_gru_1542783564/seq_len_50/model-3.h5') # 2 New pipelines for Bidir GRU #model = load_model('../final_sem_code/single_cnn_bidir_gru_1548982892/seq_len_50/model-3.h5') seq_len = 50 predict_len = 7 date_ranges = [(datetime.date(2016, 1, 1), datetime.date(2016, 6, 1)), (datetime.date(2017, 1, 1), datetime.date(2017, 6, 1)), (datetime.date(2018, 1, 1), datetime.date(2018, 6, 1))] ''' date_ranges = [(datetime.date(2015,1,1),datetime.date(2015,12,31)), (datetime.date(2016,1,1),datetime.date(2016,12,31)), (datetime.date(2017,1,1),datetime.date(2017,12,31))] ''' #test_data = [dataload.load_data('daily_spx.csv', seq_len, normalise_window=True, smoothing=False, date_range=date_range, train=False) for date_range in date_ranges] test_data = [ dataload.load_data('../2018_data/Yahoo_2000_to_2018.csv', seq_len, normalise_window=True, smoothing=False, date_range=date_range, train=False) for date_range in date_ranges ] predictions = [ dataload.predict_sequences_multiple(model, test[0], seq_len, predict_len) for test in test_data ] scores = [ model.evaluate(test[0], test[1], verbose=0) for test in test_data ] for prediction_index in range(len(predictions)): for sequence_index in range(len(predictions[prediction_index])): predictions[prediction_index][ sequence_index] = dataload.denormalize_sequence( test_data[prediction_index][2][sequence_index * 7], predictions[prediction_index][sequence_index]) for test_data_index in range(len(test_data)): for y_index in range(len(test_data[test_data_index][1])): test_data[test_data_index][1][ y_index] = dataload.denormalize_point( test_data[test_data_index][2][y_index], test_data[test_data_index][1][y_index]) model_plot = [(predictions[0], '2016'), (predictions[1], '2017'), (predictions[2], '2018')] #model_plot = [(predictions[0], 'Mar 2002 to Aug 2002'), (predictions[1], 'Mar 2015 to Aug 2015'),(predictions[2], 'Jan 2016 to Apr 2017')] #model_plot = [(predictions[0], 'Bullish'), (predictions[1], 'Bearish'), (predictions[2], 'Neutral')] results_fname = 'test_single_cnn_bidir_gru_{}'.format( int(datetime.datetime.now().timestamp())) #os.makedirs(results_fname) plot_results_multiple(model_plot, [t[1] for t in test_data], predict_len, fig_path='plots_test/plots.pdf') with open('plots_test' + "/score.txt", "w") as fout: for score in scores: pprint.pprint(score, fout)
from dnn2 import DNN2 import dataload _, test_data = dataload.load_data( 'C:/Users/student/machineLearning/kerasTest/iris.csv', 4, 0.6) model = DNN2(3, 10, 3) # 입력노드수 = train_data.x_data.shape[1] => 3 # 출력노드수 = train_data.y_data.shape[1] => 3 # 은닉층노드수 = 10 model.load_weights('kerasTest/mnist_mlp_model.h5') loss, accuracy = model.evaluate(test_data.x_data, test_data.y_data, batch_size=100) print(loss, accuracy) # print('Predictions:', model.predict(test_data.x_data).flatten()) # loss, accuracy = model.evaluate(test_data.x_data, test_data.y_data, batch_size=100) # print(loss, accuracy)
import tensorflow as tf from dataload import load_data from classify import Classifier # 데이터 준비 train_data, test_data = load_data( 'C:/Users/student/machineLearning/Tensorflow/iris.csv', 4, 0.6) # 노드 수 결정 num_input = train_data.x_data.shape[1] # 입력 노드 수 num_output = train_data.y_data.shape[1] # 출력 노드 수 num_hidden = 10 num_hidden2 = 20 # 분류기 생성 iris = Classifier(num_input, num_output, num_hidden, num_hidden2, 0.01) # 훈련 평가 iris.train(train_data, 1000, 100) iris.test(test_data) # 질의 answer = iris.query([[5.8, 4, 1.2, 0.2]]) print(answer) iris.close()