def build_predictions(): # get testing data.frame and dependent test_raw = DataFormatting("../data/TestingSet.csv") test_dict = test_raw.keep_dict() comp_test = test_raw.getAllCompanies() dependent_variable_tmp, rest, dict_final_tmp = test_raw.extract_dependent() # get real data build_raw = DataFormatting("../data/LearningSet.csv") build_dict = build_raw.keep_dict() comp_model = build_raw.getAllCompanies() dependent_variable_mod, rest_mod, dict_final_mod = build_raw.extract_dependent() real_model = BuildModel(build_dict, comp_model, dependent_variable_mod, rest_mod, dict_final_mod) y_mod = build_dict[list(dependent_variable_mod.keys())[0]] test_model = BuildModel(test_dict, comp_test, dependent_variable_tmp, rest, dict_final_tmp) y = test_dict[list(dependent_variable_tmp.keys())[0]] # manually given resulting list of the companies # original list model_list = ['STMElectro', 'Olympus', 'St Jude', 'Lenovo', 'MicronTech', 'Google'] # new list #model_list = ['STMElectro', 'Olympus', 'St Jude', 'Lenovo', 'MicronTech', 'Cardinal'] # get params on test_set test_data = test_model.get_data_for_comparison(model_list, test_dict) build_data = real_model.get_data_for_comparison(model_list, build_dict) model = sm.GLS(y_mod, build_data) model_out = model.fit().summary() params = model.fit().params predict = model.predict(params, test_data) diff = y - predict mean_pred = numpy.mean(predict) std_pred = numpy.std(predict) mae = sk.mean_absolute_error(y, predict) rmse = numpy.sqrt(sk.mean_squared_error(y, predict)) # plot predictions and real prices # plt.figure() # plt.title("Predictions vs Real Prices") # rl = plt.plot(y, color="grey", linewidth=2.0) # pr = plt.plot(predict, color="blue", linewidth=1.0) # #plt.legend([rl, pr],["real prices", "predicted"]) # plt.savefig("../PythonPredictions.png") # plt.clf() print("the mean of the y is: " + str(numpy.mean(y))) print("the std of the y is: " + str(numpy.std(y))) print("the mean of the prediction is: " + str(mean_pred)) print("the standard deviation is: " + str(std_pred)) print("the mean absolute error of the prediction is: " + str(mae)) print("the root mean squared error is: " + str(rmse)) print("the model is: " + str(model_list)) return model, predict, diff, mean_pred, std_pred, mae, rmse
def build_model(file_name): dict_data, list_companies, dependent_variable, rest_companies, dict_final = collect_data(file_name) stationary, moments, log_normal = collect_statistics(dict_data) if False in stationary.values() and False in log_normal.values(): print("choose different model to build, deal with non stationary first") else: # create model and return the informationb model_raw = BuildModel(dict_data, list_companies, dependent_variable, rest_companies, dict_final) cor_vec = model_raw.correlation_vector(dict_data) cut_off = model_raw.correlational_cutoff(cor_vec) build_model = model_raw.build_the_model(cut_off) print(build_model) return build_model
def build_model(file_name): dict_data, list_companies, dependent_variable, rest_companies, dict_final = collect_data( file_name) stationary, moments, log_normal = collect_statistics(dict_data) if False in stationary.values() and False in log_normal.values(): print( "choose different model to build, deal with non stationary first") else: # create model and return the information model_raw = BuildModel(dict_data, list_companies, dependent_variable, rest_companies, dict_final) cor_vec = model_raw.correlation_vector(dict_data) cut_off = model_raw.correlational_cutoff(cor_vec) build_model = model_raw.build_the_model(cut_off) print(build_model) return build_model
def build_predictions(): # get testing data.frame and dependent test_raw = DataFormatting("../data/TestingSet.csv") test_dict = test_raw.keep_dict() comp_test = test_raw.getAllCompanies() dependent_variable_tmp, rest, dict_final_tmp = test_raw.extract_dependent() # get real data build_raw = DataFormatting("../data/LearningSet.csv") build_dict = build_raw.keep_dict() comp_model = build_raw.getAllCompanies() dependent_variable_mod, rest_mod, dict_final_mod = build_raw.extract_dependent( ) real_model = BuildModel(build_dict, comp_model, dependent_variable_mod, rest_mod, dict_final_mod) y_mod = build_dict[list(dependent_variable_mod.keys())[0]] test_model = BuildModel(test_dict, comp_test, dependent_variable_tmp, rest, dict_final_tmp) y = test_dict[list(dependent_variable_tmp.keys())[0]] # manually given resulting list of the companies # original list model_list = [ 'STMElectro', 'Olympus', 'St Jude', 'Lenovo', 'MicronTech', 'Google' ] # new list #model_list = ['STMElectro', 'Olympus', 'St Jude', 'Lenovo', 'MicronTech', 'Cardinal'] # get params on test_set test_data = test_model.get_data_for_comparison(model_list, test_dict) build_data = real_model.get_data_for_comparison(model_list, build_dict) model = sm.GLS(y_mod, build_data) model_out = model.fit().summary() params = model.fit().params predict = model.predict(params, test_data) diff = y - predict mean_pred = numpy.mean(predict) std_pred = numpy.std(predict) mae = sk.mean_absolute_error(y, predict) rmse = numpy.sqrt(sk.mean_squared_error(y, predict)) # plot predictions and real prices plt.figure() plt.title("Predictions vs Real Prices") rl = plt.plot(y, color="grey", linewidth=2.0) pr = plt.plot(predict, color="blue", linewidth=1.0) #plt.legend([rl, pr],["real prices", "predicted"]) plt.savefig("../PythonPredictions.png") plt.clf() print("the mean of the y is: " + str(numpy.mean(y))) print("the std of the y is: " + str(numpy.std(y))) print("the mean of the prediction is: " + str(mean_pred)) print("the standard deviation is: " + str(std_pred)) print("the mean absolute error of the prediction is: " + str(mae)) print("the root mean squared error is: " + str(rmse)) print("the model is: " + str(model_list)) return model, predict, diff, mean_pred, std_pred, mae, rmse
def testRelation(): # 映射空间,测试文本相似性的判断依据 with tf.Session() as sess: tg = DataPreparation.TupleGenerator() ohEncoder = one_hot.OneHotEncoder() generator = tg.tuple_gen('content_law_labeled.txt') input_data = tf.placeholder(tf.int32, shape=[1, None]) length_data = tf.placeholder(tf.int32, shape=[1]) ops = BuildModel.model(input_data, None, length=length_data) saver = tf.train.Saver(tf.global_variables()) checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) lstmcells = [] for i in range(3): lstmcells.append({'c': [], 'h': []}) if checkpoint: saver.restore(sess, checkpoint) ALL_NUM = 1000 for count in range(ALL_NUM): content = next(generator) enter_data = ohEncoder.one_hot_single(content, True) state, predict = sess.run([ops['last_state'], ops['prediction']], feed_dict={ input_data: [enter_data], length_data: [len(enter_data)] }) for c in range(3): lstmcells[c]['c'].append(np.array(state[c][0])) lstmcells[c]['h'].append(np.array(state[c][0])) if count % 10 == 0: print('[INFO] Getting %d \tst word\'s vector...' % count) vec = lstmcells[1]['c'] vec = np.array(vec) lddata, recon = PCA.PCA(np.mat(vec), 2) points, labels = tg.generateLabel(lddata) DrawPlot.drawScatter(points, labels) outfile = open('result.txt', 'w', encoding='utf-8') for i in range(ALL_NUM): outfile.write('%f\t%f\n' % (lddata[i, 0], lddata[i, 1]))
def run_generate(): with tf.Session() as sess: input_data = tf.placeholder(dtype=tf.int32, shape=[1, None]) ops = BuildModel.model(input_data, None, [1]) saver = tf.train.Saver(tf.global_variables()) checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) if checkpoint: saver.restore(sess, checkpoint) ohEncoder = one_hot.OneHotEncoder() first = ohEncoder.get_code('1') predict, state = sess.run([ops['prediction'], ops['last_state']], feed_dict={input_data: np.array([[first]])}) word = ohEncoder.get_word(np.argmax(predict)) while word != 'EOD': sys.stdout.write(word) predict, state = sess.run( [ops['prediction'], ops['last_state']], feed_dict={ input_data: np.array([[ohEncoder.get_code(word)]]), ops['init_state']: state }) word = ohEncoder.get_word(np.argmax(predict)) print('')
def TrainAndTest(epochs, iterations, model, resultsCsv, timeStamp=None): import BuildModel import TestModel print('iterations:' + str(iterations) + '\tEpochs:' + str(epochs)) now = datetime.now() if timeStamp == None: timeStamp = now.strftime("%b%d_%I%M%S%p") modelFile = ''.join(['newModel_', timeStamp]) save_root = 'model/' + modelFile + '/' modelFile = modelFile + '.h5' writeMode = 'w' newModel = True if model is not None: save_root = model.split('/') if (len(save_root) != 1): modelFile = save_root[-1] save_root = '/'.join(save_root[:-1]) + '/' else: save_root = '' modelFile = model newModel = False writeMode = 'a' resultsFile = 'results.csv' if resultsCsv is not None: resultsFile = resultsCsv if not os.path.isdir(save_root) and newModel: os.makedirs(save_root) with open(save_root+resultsFile, writeMode, newline='') as results_file,\ open(save_root+"PerEpochMetrics.csv", writeMode,newline='') as PerE_file: results_writer = csv.writer(results_file) PerE_writer = csv.writer(PerE_file) if writeMode == 'w': results_writer.writerow([ 'Epochs', 'Cat Accuracy', 'Dog Accuracy', 'Total Accuracy', 'Average Time' ]) PerE_writer.writerow( ['Epochs', 'val_loss', 'val_acc', 'loss', 'acc']) results_file.flush() PerE_file.flush() # Make and test one model, so we have a new model results_file for i in range(0, iterations): print('Iteration', i + 1) if newModel and i == 0: hist, modelFile = BuildModel.BuildModel(epochs, None, timeStamp=timeStamp) else: hist, modelFile = BuildModel.BuildModel( epochs, save_root + modelFile) cat, dog, acc, time = TestModel.TestModel(True, save_root + modelFile) results_writer.writerow([i * epochs, cat, dog, acc, time]) # write history val_loss = hist.history['val_loss'] val_acc = hist.history['val_acc'] loss = hist.history['loss'] acc = hist.history['acc'] for epoch in hist.epoch: PerE_writer.writerow([ i * epochs + epoch, val_loss[epoch], val_acc[epoch], loss[epoch], acc[epoch] ]) PerE_file.flush() results_file.flush()
import TrainModel as TM import DefineParam as DP import os os.environ["CUDA_VISIBLE_DEVICES"]="1,2,3" # get param pixel, batchSize, nPhase, nTrainData, trainScale, learningRate, nEpoch, nFrame, ncpkt, trainFile, testFile, maskFile, saveDir, modelDir, name = DP.get_param() # load data print('-------------------------------------\nLoading Data...\n-------------------------------------\n') trainLabel, trainPhi = LD.load_train_data(mat73=True) # if train data file is large and is mat7.3 version, set mat73=True #trainLabel, trainPhi = LD.load_train_data(mat73=False) # build model print('-------------------------------------\nBuilding Model...\n-------------------------------------\n') sess, saver, Xinput, Xoutput, costAll, optmAll, Yinput, prediction = BM.build_model(trainPhi) # train data print('-------------------------------------\nTraining Model...\n-------------------------------------\n') TM.train_model(sess, saver, costAll, optmAll, Yinput, prediction, trainLabel, trainPhi, Xinput, Xoutput) print('-------------------------------------\nTraining Accomplished.\n-------------------------------------\n')
os.environ['CUDA_VISIBLE_DEVICES'] = '2' # get param pixel, batchSize, nPhase, nTrainData, trainScale, learningRate, nEpoch, nFrame, ncpkt, trainFile, testFile, maskFile, saveDir, modelDir, name = DP.get_param( ) # load data print( '-------------------------------------\nLoading Data...\n-------------------------------------\n' ) testLabel, testPhi = LD.load_test_data(mat73=False) # build model print( '-------------------------------------\nBuilding and Restoring Model...\n-------------------------------------\n' ) sess, saver, Xinput, Xoutput, Yinput, prediction = BM.build_model(testPhi, restore=True) # reconstruct image print( '-------------------------------------\nReconstructing Image...\n-------------------------------------\n' ) RI.reconstruct_image(sess, Yinput, prediction, Xinput, Xoutput, testLabel, testPhi) print( '-------------------------------------\nReconstructing Accomplished.\n-------------------------------------\n' )
model_CNN = [] History = [] score = [] X_train, y_train, X_test, y_test, number_of_classes = Data_load.Load_data( ) print("DNN ") filepath = "weights_DNN.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] model_DNN, model_tmp = BuildModel.buildModel_DNN_Tex( X_train.shape[1], number_of_classes, sparse_categorical) h = model_DNN.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=n_epochs, batch_size=batch_size, callbacks=callbacks_list, verbose=2) History.append(h) model_tmp.load_weights("weights_DNN.hdf5") if sparse_categorical == 0: model_tmp.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
1: Tokenizer that is using GLOVE 1: loadData that is using couting words or tf-idf ''' #X_train, y_train, X_test, y_test, content_L2_Train, L2_Train, content_L2_Test, L2_Test, number_of_classes_L2,word_index, embeddings_index,number_of_classes_L1 = \ # Data_helper.loadData_Tokenizer(MAX_NB_WORDS,MAX_SEQUENCE_LENGTH) X_train_DNN, y_train_DNN, X_test_DNN, y_test_DNN, content_L2_Train_DNN, L2_Train_DNN, content_L2_Test_DNN, L2_Test_DNN, number_of_classes_L2_DNN, number_of_classes_L1 = Data_helper.loadData( ) print("Loading Data is Done") #######################DNN Level 1######################## if L1_model == 0: print('Create model of DNN') model = BuildModel.buildModel_DNN(X_train_DNN.shape[1], number_of_classes_L1, 8, 64, dropout=0.25) model.fit(X_train_DNN, y_train_DNN[:, 0], validation_data=(X_test_DNN, y_test_DNN[:, 0]), epochs=epochs, verbose=2, batch_size=batch_size_L1) HDLTex = [] # Level 2 models is list of Deep Structure ######################DNN Level 2################################ if L2_model == 0: for i in range(0, number_of_classes_L1): print('Create Sub model of ', i) HDLTex.append(Sequential())
import time import functools import IPython.display as display import ImagePreprocessing as IP import DefineRepresentations as DR import BuildModel as BM import logging # Initializing objects CONTENT_IMAGE = IP.content_img STYLE_IMAGE = IP.style_img LAYER_REP = DR.RepresentationLayers() CONTENT_LAYERS, STYLE_LAYERS = LAYER_REP() NUM_CONTENT_LAYERS = LAYER_REP.get_num_content_layers() NUM_STYLE_LAYERS = LAYER_REP.get_num_style_layers() EXTRACTOR = BM.StyleContentExtraction(STYLE_LAYERS, CONTENT_LAYERS, STYLE_IMAGE, CONTENT_IMAGE, True, True) # logging configuration logging.basicConfig(format="%(message)s", level=logging.INFO) """Running Gradient Descent""" # setting style and content parameter targets style_targets = EXTRACTOR(STYLE_IMAGE)['style'] content_targets = EXTRACTOR(CONTENT_IMAGE)['content'] # defining tf.Variable to contain and optimize the image image = tf.Variable(CONTENT_IMAGE) # function to keep pixel values between 0 and 1 def regularize(image): return tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
def run_training(): if not os.path.exists(FLAGS.checkpoint_dir): os.mkdir(FLAGS.checkpoint_dir) input_data = tf.placeholder(tf.int32, shape=[FLAGS.batch_size, None]) shift_data = tf.placeholder(tf.int32, shape=[FLAGS.batch_size, None]) length_data = tf.placeholder(tf.int32, shape=[FLAGS.batch_size]) ops = BuildModel.model(input_data, shift_data, length_data) saver = tf.train.Saver(tf.global_variables()) gv_init_op = tf.global_variables_initializer() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session() as sess: sess.run(gv_init_op) checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) start_epoch = 0 if checkpoint: saver.restore(sess, checkpoint) print('[INFO] 从上一次的检查点:\t%s开始继续训练任务' % checkpoint) start_epoch += int(checkpoint.split('-')[-1]) try: for epoch in range(start_epoch, FLAGS.epochs): #data_generator = DataPreparation.sentence_gen('content_law_labeled.txt') data_generator = DataPreparation.sentence_gen('test.txt', True) try: count = 0 while True: input_list = [] shift_list = [] length_l = [] max = 0 total_len = 0 for i in range(FLAGS.batch_size): r1, r2 = next(data_generator) input_list.append(r1) shift_list.append(r2) length_l.append(r1.shape[0]) total_len += r1.shape[0] if r1.shape[0] > max: max = r1.shape[0] input_arr = padArray(input_list, max) shift_arr = padArray(shift_list, max) loss, state, _, f_count = sess.run( [ ops['total_loss'], ops['last_state'], ops['train_op'], ops['result_false'] ], feed_dict={ input_data: input_arr, shift_data: shift_arr, length_data: length_l }) count += 1 acc = float(total_len - f_count) / total_len if count % 1 == 0: print( '[INFO] Epoch: %d \tBatch: %d training loss: %.6f accuracy: %.4f' % (epoch, count, loss, acc)) except StopIteration: print('[INFO] Epoch %d 结束,对运行状态进行保存...' % epoch) saver.save(sess, os.path.join(FLAGS.checkpoint_dir, 'rnnws'), global_step=epoch) pass except KeyboardInterrupt: print('[INFO] 手动暂停实验,将对运行状态进行保存...') saver.save(sess, os.path.join(FLAGS.checkpoint_dir, 'rnnws'), global_step=epoch) print('[INFO] 保存完成')
# verbose=2, # batch_size=batch_size_L1) #######################CNN Level 1######################## # if L1_model == 1: # print('Create model of CNN') # model = BuildModel.buildModel_CNN(word_index, embeddings_index,number_of_classes_L1,MAX_SEQUENCE_LENGTH,EMBEDDING_DIM,1) # model.fit(X_train, y_train[:,0], # validation_data=(X_test, y_test[:,0]), # epochs=epochs, # verbose=2, # batch_size=batch_size_L1) #######################RNN Level 1######################## if L1_model == 2: print('Create model of RNN in level1') model = BuildModel.buildModel_RNN(word_index, embeddings_index,number_of_classes_L1,MAX_SEQUENCE_LENGTH,EMBEDDING_DIM) model.fit(X_train, y_train[:,0], validation_data=(X_val, y_val[:,0]), epochs=epochs, verbose=2, batch_size=batch_size_L1) HDLTex = [] # Level 2 models is list of Deep Structure ######################DNN Level 2################################ # if L2_model == 0: # for i in range(0, number_of_classes_L1): # print('Create Sub model of ',i) # HDLTex.append(Sequential()) # HDLTex[i] = BuildModel.buildModel_DNN(content_L2_Train_DNN[i].shape[1], number_of_classes_L2_DNN[i],2, 1024, dropout=0.5) # HDLTex[i].fit(content_L2_Train_DNN[i], L2_Train_DNN[i], # validation_data=(content_L2_Test_DNN[i], L2_Test_DNN[i]),
d_train, df_test, word_index, embeddings_index = \ loadData_Tokenizer(DATASET, MAX_NB_WORDS,MAX_SEQUENCE_LENGTH) print("Loading Data is Done") # initiate the models counter = 1 #keep track of number of models created print('Create model of RNN in level1') embedding_matrix = np.random.random((len(word_index) + 1, EMBEDDING_DIM)) for word, i in word_index.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector model = BuildModel.buildModel_RNN(word_index, embedding_matrix, d_train.number_of_classes,MAX_SEQUENCE_LENGTH, EMBEDDING_DIM) HDLTex1 = [] # Level 2 models is list of Deep Structure HDLTex2=[[] for i in range(d_train.number_of_classes)] # Level 3 models is list of list of Deep Structure for i in range(0, d_train.number_of_classes): print('Create Sub model of level 1: ', i) HDLTex1.append(Sequential()) HDLTex1[i] = BuildModel.buildModel_RNN(word_index, embedding_matrix, d_train.childs[i].number_of_classes, MAX_SEQUENCE_LENGTH, EMBEDDING_DIM) counter +=1 # for i in range(0, d_train.number_of_classes): for j in range(0, d_train.childs[i].number_of_classes):
1: Tokenizer that is using GLOVE 1: loadData that is using couting words or tf-idf ''' X_train, y_train, X_test, y_test, content_L2_Train, L2_Train, content_L2_Test, L2_Test, number_of_classes_L2,word_index, embeddings_index,number_of_classes_L1 = \ Data_helper.loadData_Tokenizer(MAX_NB_WORDS,MAX_SEQUENCE_LENGTH) X_train_DNN, y_train_DNN, X_test_DNN, y_test_DNN, content_L2_Train_DNN, L2_Train_DNN, content_L2_Test_DNN, L2_Test_DNN, number_of_classes_L2_DNN = Data_helper.loadData( ) print("Loading Data is Done") #######################DNN Level 1######################## if L1_model == 0: print('Create model of DNN') model = BuildModel.buildModel_DNN(X_train_DNN.shape[1], number_of_classes_L1, 8, 64, dropout=0.25) model.fit(X_train_DNN, y_train_DNN[:, 0], validation_data=(X_test_DNN, y_test_DNN[:, 0]), epochs=epochs, verbose=2, batch_size=batch_size_L1) #######################CNN Level 1######################## if L1_model == 1: print('Create model of CNN') model = BuildModel.buildModel_CNN(word_index, embeddings_index, number_of_classes_L1, MAX_SEQUENCE_LENGTH, EMBEDDING_DIM,
import time import BuildModel as starter import numpy MAX_TRAIN_TIME = 30 * 60 # 3 minutes. reader = starter.DatasetReader() #it_isdt_train_tagged #ja_gsd_train_tagged train_filename = '/Users/nishatiwari/Documents/deep learning nlp/hmm-training-data/ja_gsd_train_tagged.txt' test_filename = train_filename.replace('_train_', '_dev_') term_index, tag_index, train_data, test_data = reader.ReadData( train_filename, test_filename) (train_terms, train_tags, train_lengths) = train_data (test_terms, test_tags, test_lengths) = test_data num_terms = max(train_terms.max(), test_terms.max()) + 1 model = starter.SequenceModel(train_terms.shape[1], num_terms, train_tags.max() + 1) model.train_data = train_data language_name = "Italian" num_terms = max(train_terms.max(), test_terms.max()) + 1 model = starter.SequenceModel(train_terms.shape[1], num_terms, train_tags.max() + 1) # def get_test_accuracy(): predicted_tags = model.run_inference(test_terms, test_lengths)