def firstStep(metaOptimizer, smallTrainFilename, smallTestFilename): trainData, trainLabel = loadDataset(smallTrainFilename) testData, testLabel = loadDataset(smallTestFilename) metaOptimizer.initialize_optimizer( "random", None, trainData, trainLabel, testData, testLabel, jobs=-1, iterations=700, scoresCsvFilename=None ) metaOptimizer.optimized = metaOptimizer.algorithm() sortedScores = sorted( [(mean_score, scores.std() / 2, params, scores) for params, mean_score, scores in optimized.grid_scores_], reverse=True, )
def mix(gans, num_real, num_synth): """ :return: a Dataloader with num_real real and num_synth synthetic images """ if num_real <= 0: return utils.gen_synth_data(gans, n_entries=num_synth) elif num_synth <= 0: return (utils.loadDataset(train_size=num_real, batch_size=100))[1] else: synth_data_loader = utils.gen_synth_data(gans, n_entries=num_synth) _, orig_data_loader = utils.loadDataset(train_size=num_real, batch_size=100) mixed_data_loader = torch.utils.data.DataLoader(torch.cat((orig_data_loader.dataset, synth_data_loader.dataset), 0), batch_size=100, shuffle=True) return mixed_data_loader
def main(outputName): print("Welcome into RNN implementation, (recording will be on ", outputName, ")") random.seed("MetaMind") # Lucky seed ? Fixed seed for replication np.random.seed(7) print("Parsing dataset, creating dictionary...") # Dictionary initialisation vocabulary.initVocab() # Loading dataset datasets = {} datasets['training'] = utils.loadDataset("trees/train.txt") print("Training loaded !") datasets['testing'] = utils.loadDataset("trees/test.txt") print("Testing loaded !") datasets['validating'] = utils.loadDataset("trees/dev.txt") print("Validation loaded !") print("Datasets loaded !") print("Nb of words", vocabulary.vocab.length()) # Datatransform (normalisation, remove outliers,...) ?? > Not here # Grid search on our hyperparameters (too long for complete k-fold cross validation so just train/test) for mBS in miniBatchSize: for aRNI in adagradResetNbIter: for lR in learningRate: for rT in regularisationTerm: params = {} params["nbEpoch"] = nbEpoch params["learningRate"] = lR params["regularisationTerm"] = rT params["adagradResetNbIter"] = aRNI params["miniBatchSize"] = mBS # No need to reset the vocabulary values (contained in model.L so automatically reset) # Same for the training and testing set (output values recomputed at each iterations) model = train.train(outputName, datasets, params) # TODO: Plot the cross-validation curve # TODO: Plot a heat map of the hyperparameters cost to help tunning them ? ## Validate on the last computed model (Only used for final training) #print("Training complete, validating...") #vaError = model.computeError(datasets['validating'], True) #print("Validation error: ", vaError) print("The End. Thank you for using this program!")
def main(outputName): print("Welcome into RNTN implementation 0.6 (recording will be on ", outputName, ")") random.seed("MetaMind") # Lucky seed ? Fixed seed for replication np.random.seed(7) print("Parsing dataset, creating dictionary...") # Dictionary initialisation vocabulary.initVocab() # Loading dataset datasets = {} datasets['training'] = utils.loadDataset("trees/train.txt"); print("Training loaded !") datasets['testing'] = utils.loadDataset("trees/test.txt"); print("Testing loaded !") datasets['validating'] = utils.loadDataset("trees/dev.txt"); print("Validation loaded !") print("Datasets loaded !") print("Nb of words", vocabulary.vocab.length()); # Datatransform (normalisation, remove outliers,...) ?? > Not here # Grid search on our hyperparameters (too long for complete k-fold cross validation so just train/test) for mBS in miniBatchSize: for aRNI in adagradResetNbIter: for lR in learningRate: for rT in regularisationTerm: params = {} params["nbEpoch"] = nbEpoch params["learningRate"] = lR params["regularisationTerm"] = rT params["adagradResetNbIter"] = aRNI params["miniBatchSize"] = mBS # No need to reset the vocabulary values (contained in model.L so automatically reset) # Same for the training and testing set (output values recomputed at each iterations) model, errors = train.train(outputName, datasets, params) # TODO: Plot the cross-validation curve # TODO: Plot a heat map of the hyperparameters cost to help tunning them ? ## Validate on the last computed model (Only used for final training) #print("Training complete, validating...") #vaError = model.computeError(datasets['validating'], True) #print("Validation error: ", vaError) print("The End. Thank you for using this program!")
def main(args): extractor = args.extractor classifier = args.classifier #print("Use {} is the feature extractor".format(extractor)) #print("Use {} is the classifier".format(classifier)) # Load dataset into memory dataset, labelset = loadDataset() # Feature extraction feature_vectors = featureExtraction(dataset=dataset, method=extractor) # Split dataset print("Step 3. Split dataset into training data and test data") print("\tSplitting data...") X_train, X_test, y_train, y_test = train_test_split(feature_vectors, labelset, test_size=TEST_SIZE) print("\tSplit dataset successfully !") print("\tThe size of training data: {}".format(len(X_train))) print("\tThe size of test data: {}".format(len(X_test))) # Training model using explicited classifier." model = trainModel(dataset=X_train, label=y_train, classifier=classifier) # Test model on test data. print("Step 5. Test model on test data") print("\tTesing model ...") accuary = model.score(X_test, y_test) print("\tThe accuary of model is {} %".format(accuary*100)) # Save model print("Step 6. Save model into disk") print("\tSaving ...") #with open('model/{}_{}_{}.pkl'.format(extractor,classifier, datetime.datetime.today().strftime('%d-%m-%Y')), 'wb') as fid: #cPickle.dump(gnb, fid) with open('model/{}_{}.pkl'.format(extractor,classifier), 'wb') as fid: cPickle.dump(gnb, fid) print("\tSave model successully with name {}".format(classifier, datetime.datetime.today().strftime('%d-%m-%Y')))
def process_arguments(): parser = argparse.ArgumentParser(description='Optimize set of different classifiers with meta-parameter optimization') parser.add_argument('train', help='Train dataset') parser.add_argument('test', help='Test dataset') parser.add_argument('modelDirectory', help='Directory to save best models') parser.add_argument('evaluationsFilename', help='Filename to save models result') parser.add_argument('-t', '--type', default='grid', choices=['grid', 'random', 'pso'], help='Search type') parser.add_argument('-i', '--iterations', default=-1, type=int, help='Iterations amount for pso and random search') parser.add_argument('-j', '--jobs', default=-1, type=int, help='Processes amount for learning') args = parser.parse_args() trainData, trainLabel = loadDataset(args.train) testData, testLabel = loadDataset(args.test) ctp = ClassifierTestParams(args.type, args.iterations, args.modelDirectory, args.evaluationsFilename, trainData, trainLabel, testData, testLabel, args.jobs) return ctp
def main(): real_size = 100 # number of real images per Dataloader to use at the beginning dataloaders, _ = utils.loadDataset( train_size=real_size, batch_size=25, image_path='./mnist/train-images-idx3-ubyte', label_path='./mnist/train-labels-idx1-ubyte') self_learn(dataloaders, train_size=real_size)
def main(model_size=1, trial=2): num_gans = 1 dataloaders = utils.loadDataset() #repeatTrain(dataloaders, trial = trial, epoch_len = 500, end = 2000, alpha = model_size) repeatTrain(dataloaders, trial=trial, epoch_len=50, end=100, alpha=model_size) """
def main(train_size, model_size, trial): num_gans = 10 dataloaders, labeledDataLoader = utils.loadDataset( train_size=train_size, batch_size=25, image_path='./mnist/train-images-idx3-ubyte', label_path='./mnist/train-labels-idx1-ubyte') repeatTrain(dataloaders, trial=trial, epoch_len=500, end=2000, alpha=model_size) """
def main(): print("Welcome into RNTN implementation 0.1") print("Loading dictionary...") # Dictionary initialisation vocabulary.initVocab(inputModel) # Loading dataset validationSet = utils.loadDataset("trees/dev.txt"); print("Validation loaded !") # Creating the model model = rntnmodel.Model(inputModel) print("Computation validation...") vaError = model.computeError(validationSet, True) print("Validation error: ", vaError)
def train_step(test_xs): batch_size = flags.batch_size # batch_size = 50 n_epochs = flags.num_epoch mask = np.random.binomial( 1, 1 - flags.corrupt_prob, (int(np.round(batch_size * flags.validation)) + 1, 225)) # print(mask[:5]) for epoch_i in range(n_epochs): # print dataset_train.shape[1] // batch_size datasets = utils.loadDataset(batch_size=batch_size, max=flags.max, dataset_dir=flags.datasetPath) f = 0 for dataset in datasets: dataset_train, dataset_test = partition(dataset, shuffle=False) mean_img = np.mean(dataset_train, axis=1) dataset_train = np.array( [img - mean_img for img in dataset_train.T]) dataset_train = dataset_train.T dataset_train_, dataset_train = corrupt(dataset_train, mask=mask) _, score, step, summaries = sess.run( [train_op, ae.score, global_step, train_summary_op], feed_dict={ ae.x: dataset_train, ae.x_: dataset_train_ }) current_step = tf.train.global_step(sess, global_step) if current_step % 100 == 0: print("epoch:{} step:{} score:{}".format( epoch_i, step, score)) train_summary_writer.add_summary(summaries, step) if current_step % 1000 == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path)) # score, step, summaries, output, W= sess.run([ae.score, global_step, dev_summary_op, ae.output, ae.encoder], feed_dict={ # ae.x: test_xs, # ae.x_: test_xs}) # print("evaluation:\nscore:{}".format(score)) test_xs = np.asarray(test_xs) print("Testxs : " + str(test_xs.shape)) return test_xs
def main(): print("Welcome into RNTN implementation 0.1") print("Loading dictionary...") # Dictionary initialisation vocabulary.initVocab(inputModel) # Loading dataset validationSet = utils.loadDataset("trees/dev.txt") print("Validation loaded !") # Creating the model model = rntnmodel.Model(inputModel) print("Computation validation...") vaError = model.computeError(validationSet, True) print("Validation error: ", vaError)
def getFeatures(numWordsToUse): allTweets, allTweetsSentiment, allRows = utils.loadDataset('twitterCorpus/aggregatedCorpusCleaned.csv',1,2) tweets = [] sentiment = [] rowCount = 0 for row in allRows: # skip header row if rowCount == 0: rowCount += 1 else: # the aggregatedCorpus has sentiment scores from 1 - 5, while STS has scores from 0 - 4 rowSentiment = str( int(row[0]) - 1 ) # only include the row if this is a fairly extreme sentiment if rowSentiment in ('0','4'): if rowSentiment == '4': rowSentiment = 1 else: rowSentiment = 0 tweets.append(row[2]) sentiment.append(rowSentiment) tokenizedTweets, cleanedSentiment = utils.tokenize(tweets, sentiment) global popularWords formattedTweets, sentiment, popularWords = utils.nlpFeatureEngineering( tokenizedTweets, cleanedSentiment,0,numWordsToUse,'counts' ) # transform list of dictionaries into a sparse matrix sparseFeatures = dv.fit_transform(formattedTweets) return sparseFeatures, sentiment
def getFeatures(numWordsToUse): allTweets, allTweetsSentiment, allRows = utils.loadDataset( 'twitterCorpus/aggregatedCorpusCleaned.csv', 1, 2) tweets = [] sentiment = [] rowCount = 0 for row in allRows: # skip header row if rowCount == 0: rowCount += 1 else: # the aggregatedCorpus has sentiment scores from 1 - 5, while STS has scores from 0 - 4 rowSentiment = str(int(row[0]) - 1) # only include the row if this is a fairly extreme sentiment if rowSentiment in ('0', '4'): if rowSentiment == '4': rowSentiment = 1 else: rowSentiment = 0 tweets.append(row[2]) sentiment.append(rowSentiment) tokenizedTweets, cleanedSentiment = utils.tokenize(tweets, sentiment) global popularWords formattedTweets, sentiment, popularWords = utils.nlpFeatureEngineering( tokenizedTweets, cleanedSentiment, 0, numWordsToUse, 'counts') # transform list of dictionaries into a sparse matrix sparseFeatures = dv.fit_transform(formattedTweets) return sparseFeatures, sentiment
model.add(LSTM(150, dropout=0.2, recurrent_dropout=0.2)) elif networkType == GRU_LABEL: model.add(GRU(150, dropout=0.2, recurrent_dropout=0.2)) elif networkType == MLP_LABEL: model.add(Flatten()) model.add(Dense(50, activation='relu')) model.add(Dense(27, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model if __name__ == '__main__': arguments = parseArgs() dataset = utils.loadDataset(arguments.reprocessDataset) xData, yData = dataset[constants.TWEET_COLUMN], dataset[ constants.LABEL_COLUMN] vocabularySize = 13000 xEncoder, yEncoder = Tokenizer( num_words=vocabularySize), preprocessing.OneHotEncoder() print('Encoding and splitting xData, yData') xDataEncoded, yDataEncoded = encodeX(xEncoder, xData), encodeY(yEncoder, yData) xTrain, xValid, yTrain, yValid = model_selection.train_test_split( xDataEncoded, yDataEncoded) model = buildModel(vocabularySize, arguments.networkType) print(model.summary())
# Number of max historicaal records to keep for predicting time_window_threshold = 30 refresh_time_interval = 15 if args.mode == 'train': logging.info('Mode: Training') logging.info('Evaluation: ' + str(True if args.evaluate in ('True', 'true') else False)) # Initialization of training params # Time interval to resample data sample_time = '60S' logging.info('Loading train & validation dataset') df = loadDataset(trainset) # Resample every 15 sec, because prometheus & amari exporter have different timestamps df = df.resample('15S', closed='right', label='left').mean() # Fill train df with 0s if there are NaNs and Ifns df.fillna(0, inplace=True) logging.info('Saving normalization values') normalization_stats = saveNormalizationStats(df, cols_to_normalize) saveDictJson(normalization_stats, stats_json) logging.info('Normalizing train & validation data') for col in cols_to_normalize: df[col + '_normalized'] = normalizeFeature(
from Net import Net, test_model from utils import confusion, F1_score, loadDataset, saveNNParas import time # Loading the previous network status. feature_num = 11 hidden_num = 30 output_num = 3 load_net = Net(feature_num, hidden_num, output_num) load_net.load_state_dict(torch.load('net_model_genre.pt')) #load_net.load_state_dict(torch.load('net_model_subjective_rating.pt')) load_net.eval() # Loading testing dataset to evaluate new network. x_train, y_train = loadDataset('testing') x_test, y_test = loadDataset('testing') # Loading the information of vector. vectors = pd.read_excel('vector_angle_sample.xls', header=None) raw_df = pd.DataFrame({ 'row': vectors.iloc[:, 0], 'col': vectors.iloc[:, 1], 'vector': vectors.iloc[:, 2] }) # Sorting by the values of vector angle in ascending order. increase_res = raw_df.sort_values('vector', ascending=True) unique_row = increase_res.row.unique() unique_col = increase_res.col.unique()
for i in xrange(stateNum): for j in xrange(stateNum): fout.write(str(A[i,j])) fout.write(" ") fout.write("\n") def dataCorrectRate(Set): correct=0 totalNum=0 for i in xrange(len(Set[0])): maxNum=0 maxState=-1 for j in xrange(len(Set[0][i])): if(Set[0][i][j]>maxNum): maxNum=Set[0][i][j] maxState=j if(Set[1][i]==maxState): correct=correct+1 totalNum+=1 print "correct rate:",float(correct)/float(totalNum) if __name__ == '__main__': dataSet=loadDataset("../../pkl/t26v31.pkl",3) #dataSet=loadDataset("/home/roylu/share/DNNResult/t23.7v29.95/t23.7v29.95.pkl",3) dataCorrectRate(dataSet[0]) trainHMM(dataSet) #get_result("A_with_error:0.281822988462")
def run(): trainDataset, testDataset, labelGenerator = utils.loadDataset() # Making DataLoaders trainDataLoader = torch.utils.data.DataLoader( trainDataset, batch_size=config.TRAIN_BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True) testDataLoader = torch.utils.data.DataLoader( testDataset, batch_size=config.TEST_BATCH_SIZE, num_workers=1) totalNOsOfLabels = len(labelGenerator.classes_) device = torch.device(config.DEVICE) # Defining Model print("Making model:- ", config.modelName) citeModel = None if config.modelName == "BertBase": citemodel = model.BERTBaseUncased(numOfLabels=totalNOsOfLabels, dropout=config.DROPOUT) elif config.modelName == "SciBert": citemodel = model.SciBertUncased(numOfLabels=totalNOsOfLabels, dropout=config.DROPOUT) citemodel.to(device) param_optimizer = list(citemodel.named_parameters()) ''' There is generally no need to apply L2 penalty (i.e. weight decay) to biases and LayerNorm.weight. Hence, we have following line. Update: There is need to apply L2 to LayerNorm.weight as per Google TF implementation so reverting it ;) ''' no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] # Removed "LayerNorm.bias", optimizer_parameters = [ { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.01, # changed this from 0.001 to 0.1 }, { "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0, } ] num_train_steps = int(len(trainDataLoader) * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=config.LEARNING_RATE) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=num_train_steps * config.WARMUP_PROPORTION, num_training_steps=num_train_steps) if config.dotrain: print('In Training') for epoch in range(config.EPOCHS): trainingLoss = engine.train(trainDataLoader, citemodel, optimizer, device, scheduler) print("Epoch: ", epoch, " Loss: ", trainingLoss, '\n') # Saving the model os.makedirs(os.path.dirname(config.MODEL_SAVED.format( config.modelName)), exist_ok=True) torch.save(citemodel.state_dict(), config.MODEL_SAVED.format(config.modelName)) print('Model is saved at: ', config.MODEL_SAVED.format(config.modelName)) ''' Evaluating the model ''' print("Loading the model") #citemodel = model.BERTBaseUncased(*args, **kwargs) citemodel.load_state_dict( torch.load(config.MODEL_SAVED.format(config.modelName))) outputs, targets = engine.eval(testDataLoader, citemodel, device) # Saving the results with corresponding targets os.makedirs(os.path.dirname( config.PREDICTIONS_PATH.format(config.modelName)), exist_ok=True) with open(config.PREDICTIONS_PATH.format(config.modelName), 'wb') as f: pickle.dump(outputs, f) # First saved the predicted outputs pickle.dump(targets, f) # Then saved the corresponding targets print('Starting Evaluation...') utils.metric(outputs, targets)
def optimize_parameters( parameters, dataset, train_steps=1000, pretrain_steps=250 ): """ Returns the best the hyperparameters tuning for the ARL model. args: parameters: a dictionary with the hyperparameters and their values, e.g. {'batch_size': [32, 64, 256], [...]}. dataset: name of the dataset ([toy_data, uci_adult, compas, law]). """ # Load the training data. train_dataset = loadDataset( dataset=dataset, train_or_test="train", embedding_size=32, ) # Create the default model parameters. model_params = { "embedding_size": train_dataset.categorical_embedding_sizes, "n_num_cols": len(train_dataset.mean_std.keys()), "learner_hidden_units": [64, 32], "adversary_hidden_units": [32], "batch_size": None, } lr_params = {"learner": None, "adversary": None} cross_val = KFold(n_splits=5) steps = None # Create a defaultdict for the results. params2aucs = defaultdict(list) # Get all possible combinations of parameters. options = itertools.product(*parameters.values()) n_options = len(list(itertools.product(*parameters.values()))) for i, (batch_size, learner_lr, adversary_lr) in enumerate(options, 1): iter_start = time.time() model_params["batch_size"] = batch_size lr_params["learner"] = learner_lr lr_params["adversary"] = adversary_lr print( f"--- ({i}/{n_options}) batch_size: {batch_size}, " f"learner_lr: {learner_lr}, adversary_lr: {adversary_lr}" ) # 5-fold cross-validation for train_index, test_index in cross_val.split(train_dataset): # Get the performance of the model metrics = train_for_n_iters( train_dataset.get_split(train_index), train_dataset.get_split(test_index), model_params, lr_params, average_over=5, train_steps=train_steps, pretrain_steps=pretrain_steps, print_loss=False, ) params2aucs[(batch_size, learner_lr, adversary_lr)].append( metrics.auc_avg ) steps = metrics.steps iter_stop = time.time() mean_best_auc = np.mean( params2aucs[(batch_size, learner_lr, adversary_lr)], axis=0 ) best_auc_idx = np.argmax(mean_best_auc) print( f"\t took {iter_stop - iter_start:.0f} seconds | " f"best AUC is {mean_best_auc[best_auc_idx]:.3f} on step " f"{steps[best_auc_idx]}" ) # Average the folds. params2aucs = { option: np.mean(aucs, axis=0) for option, aucs in params2aucs.items() } # Find the highest AUC. params = list(params2aucs.keys()) aucs = np.array(list(params2aucs.values())) param_idx, step_idx = np.unravel_index(np.argmax(aucs), aucs.shape) best_auc = aucs[param_idx, step_idx] best_params = params[param_idx] best_step = steps[step_idx] # Return the results. results = { "batch_size": best_params[0], "lr_learner": best_params[1], "lr_adversary": best_params[2], } return results, best_auc, best_step
from sklearn.feature_extraction import DictVectorizer from sklearn.cross_validation import train_test_split import utils import trainClassifiers from sentimentCorpora import nltkMovieReviews from sentimentCorpora import stsTwitterCorpus from sentimentCorpora import atcTwitterCorpus from sentimentCorpora import nltkTwitterCorpus from sentimentCorpora import nltkTwitterNoEmoticonsCorpus # load the "training" data trainingTweets, trainingSentiment, allRows = utils.loadDataset('training.1600000.processed.noemoticon.csv', 10) trainingTweets, trainingSentiment = utils.tokenize(trainingTweets, trainingSentiment) # load the test data testTweets, testSentiment, testRows = utils.loadDataset('testdata.manual.2009.06.14.csv', 1) testTweets, testSentiment = utils.tokenize(testTweets, testSentiment) # instead of predicting two categories ('0', and '4') that the algorithm doesn't inherently understand are mutually exclusive, we will explicitly turn this into a single binary classification problem (0 or 1) cleanedTrainingSentiment = [] for score in trainingSentiment: if score == '4': cleanedTrainingSentiment.append(1) else: cleanedTrainingSentiment.append(0)
def pretrain(self, batch_size, num_epoch, sess): graph = tf.Graph() # init = tf.global_variables_initializer() saver = tf.train.Saver(tf.global_variables(), max_to_keep=5) timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") params_prefix = os.path.join(out_dir, 'params') os.makedirs(params_prefix) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) with open("%s\\params.txt" % params_prefix, 'w') as params_file: params_file.writelines(flags) sess.run(tf.global_variables_initializer()) # sess.run(init) for i in range(len(self.dimensions)): # learning_rate = 0.01 global_step = tf.Variable(0, trainable=False, name="global_step") learning_rate = 0.01 # learning_rate = tf.train.exponential_decay(0.01, global_step, flags.max / flags.batch_size, 0.98, staircase=True) optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=self.momentum) # optimizer = tf.train.AdamOptimizer(learning_rate) # optimizer = tf.train.GradientDescentOptimizer(learning_rate) grads_and_vars = optimizer.compute_gradients(self.scores[i]) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) sess.run(tf.initialize_all_variables()) # define summaries # grad_summaries = [] # for g, v in grads_and_vars: # if g is not None: # grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g) # sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) # grad_summaries.append(grad_hist_summary) # grad_summaries.append(sparsity_summary) # grad_summaries_merged = tf.summary.merge(grad_summaries) score_summary = tf.summary.scalar("score", self.scores[i]) train_summary_op = tf.summary.merge([score_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) for j in range(num_epoch): for batch in utils.loadDataset(batch_size, max=flags.max, dataset_dir=flags.datasetPath): mask_t = np.random.binomial(1, 1 - flags.corrupt_prob, batch.shape) self.batch = batch # mean_img = np.mean(batch, axis=1) # batch = np.array([img - mean_img for img in batch.T]) # batch = batch.T _, score, step = sess.run( [train_op, self.scores[i], global_step], feed_dict={ self.input_x: batch, self.mask: mask_t }) current_step = tf.train.global_step(sess, global_step) time_str = datetime.datetime.now().isoformat() if current_step % 100 == 0: print("{}: traning Layer_{} ".format(time_str, i) + "epoch:%d " % j + "step: %d" % step + " score: {}".format(score)) if current_step % 2000 == 0: _, score, step, summaries = sess.run([ train_op, self.scores[i], global_step, train_summary_op ], feed_dict={ self.input_x: batch, self.mask: mask_t }) train_summary_writer.add_summary(summaries, step) path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path)) self.finetuning(batch_size, num_epoch, sess, saver, out_dir)
def finetuning(self, batch_size, num_epoch, sess, saver, out_dir): current_input = self.layer_output[len(self.dimensions) - 1] self.ft_losses = [tf.constant(0.0) for _ in self.dimensions] for layer_i, dimension in enumerate(self.dimensions): print(2 - layer_i) if layer_i == 3: n_output = 225 else: n_output = self.dimensions[2 - layer_i] print(n_output) with tf.name_scope("finetuning_decoder_%i" % layer_i): W = tf.transpose(self.Ws[3 - layer_i], name="W") b = tf.Variable(tf.constant(0.1, shape=[n_output]), name="b") self.out_put = tf.nn.sigmoid(tf.matmul(current_input, W) + b) self.ft_losses[layer_i] += tf.nn.l2_loss(W) current_input = self.out_put with tf.name_scope('fn_score'): loss = tf.pow(self.out_put - self.input_x, 2) self.score = tf.reduce_sum( loss, name="score") + self.ft_losses[layer_i] * flags.l2_reg_lambda global_step = tf.Variable(0, trainable=False, name="global_step") learning_rate = 0.001 # learning_rate = tf.train.exponential_decay(0.01, global_step, flags.max / flags.batch_size, 0.98, staircase=True) saver = tf.train.Saver(tf.global_variables()) # optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=self.momentum) # optimizer = tf.train.AdamOptimizer(learning_rate) optimizer = tf.train.GradientDescentOptimizer(learning_rate) grads_and_vars = optimizer.compute_gradients(self.score) finetune_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) sess.run(tf.initialize_all_variables()) print("Writing to {}\n".format(out_dir)) checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) # grad_summaries = [] # for g, v in grads_and_vars: # if g is not None: # grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g) # sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) # grad_summaries.append(grad_hist_summary) # grad_summaries.append(sparsity_summary) # grad_summaries_merged = tf.summary.merge(grad_summaries) score_summary = tf.summary.scalar("score", self.score) finetune_summary_op = tf.summary.merge([score_summary]) finetune_summary_dir = os.path.join(out_dir, "summaries", "finetune") finetune_summary_writer = tf.summary.FileWriter( finetune_summary_dir, sess.graph) sess.run(tf.global_variables_initializer()) print("Starting finetuning") for j in range(num_epoch): for batch in utils.loadDataset(batch_size, max=flags.max, dataset_dir=flags.datasetPath): mask_t = np.random.binomial(1, 1 - flags.corrupt_prob, batch.shape) # mean_img = np.mean(batch, axis=1) # batch = np.array([img - mean_img for img in batch.T]) # batch = batch.T _, score, step, summaries = sess.run([ finetune_op, self.score, global_step, finetune_summary_op ], feed_dict={ self.input_x: batch, self.mask: mask_t }) current_step = tf.train.global_step(sess, global_step) time_str = datetime.datetime.now().isoformat() if current_step % 100 == 0: print("{}: finetuning step: {}".format(time_str, step) + " score: {}".format(score)) if current_step % 10000 == 0: finetune_summary_writer.add_summary(summaries, step) path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path)) # with graph.as_default(): # with sess.as_default(): n_examples = 15 with tf.device('/cpu:0'): test_xs = utils.load_whole_dataset(1000000, flags.datasetPath) mask = np.random.binomial(1, 1, test_xs.shape) score, recon, encodes = sess.run( [self.score, self.out_put, self.layer_output], feed_dict={ self.input_x: test_xs, self.mask: mask }) # fig, axs = plt.subplots(2, n_examples, figsize=(10, 2)) # for example_i in range(n_examples): # axs[0][example_i].imshow( # # np.reshape(test_xs[example_i, :], (28, 28))) # np.reshape(test_xs[example_i, :], (15, 15))) # axs[1][example_i].imshow( # # np.reshape([recon[example_i, :] + mean_img], (28, 28))) # np.reshape([recon[example_i, :]], (15, 15))) # print ('Plot complete now showing...') clf = svm.OneClassSVM(kernel='rbf', gamma='auto', nu=1e-3) clf.fit(encodes[3]) with open('./svm.model', 'wb') as m: pickle.dump(clf, m)
'If specified, reads and processes the dataset again. ' + 'Else reads an already processed dataset from ' + constants.CLASSIFICATION_DATA_PATH) return parser.parse_args(sys.argv[1:]) def printTopics(model): predicted_topics = model.print_topics(num_topics=5, num_words=5) for i, topics in predicted_topics: print('Words in Topic {}:\n {}'.format(i + 1, topics)) if __name__ == '__main__': arguments = parseArgs() dataset = utils.loadDataset(arguments.reprocessDataset, classification=False, splitWords=True) # Creating dictionary from dataset, where each unique term is assigned an index dictionary = corpora.Dictionary(dataset) # Converting list of documents into Bag of Words using dictionary doc_term_matrix = [dictionary.doc2bow(doc) for doc in dataset] # Training models on the document term matrix modelList = [ LdaModel(doc_term_matrix, num_topics=10, id2word=dictionary, passes=2), LsiModel(doc_term_matrix, num_topics=10, id2word=dictionary) ] for model in modelList:
warnings.filterwarnings("ignore", category=DeprecationWarning) from sklearn.feature_extraction import DictVectorizer from sklearn.cross_validation import train_test_split import utils import trainClassifiers from sentimentCorpora import nltkMovieReviews from sentimentCorpora import stsTwitterCorpus from sentimentCorpora import atcTwitterCorpus from sentimentCorpora import nltkTwitterCorpus from sentimentCorpora import nltkTwitterNoEmoticonsCorpus # load the "training" data trainingTweets, trainingSentiment, allRows = utils.loadDataset( 'training.1600000.processed.noemoticon.csv', 10) trainingTweets, trainingSentiment = utils.tokenize(trainingTweets, trainingSentiment) # load the test data testTweets, testSentiment, testRows = utils.loadDataset( 'testdata.manual.2009.06.14.csv', 1) testTweets, testSentiment = utils.tokenize(testTweets, testSentiment) # instead of predicting two categories ('0', and '4') that the algorithm doesn't inherently understand are mutually exclusive, we will explicitly turn this into a single binary classification problem (0 or 1) cleanedTrainingSentiment = [] for score in trainingSentiment: if score == '4': cleanedTrainingSentiment.append(1) else: cleanedTrainingSentiment.append(0)
class Logger(object): def __init__(self, logFilename): self.terminal = sys.stdout self.log = open(logFilename, "a") def write(self, message): self.terminal.write(message) self.log.write(message) if __name__ == '__main__': P = dnnUtils.Parameters(setting) print P.outputFilename datasets = utils.loadDataset(filename=P.datasetFilename, totalSetNum=3) if not USE_EXIST_MODEL: sys.stdout = Logger(P.logFilename) bestModel = dnn.trainDNN(datasets, P) bestModelFilename = '../model/' + P.outputFilename + '.model' utils.makePkl(bestModel, P.bestModelFilename) else: # TODO use filename to build P bestModelFilename = sys.argv[2] bestModel = utils.loadPkl(bestModelFilename) dnn.getResult(bestModel, datasets[1], P, 'valid', P.validResultFilename) dnn.getResult(bestModel, datasets[2], P, 'test', P.testResultFilename) dnn.getProb(bestModel, datasets[0], P.trainProbFilename, P) dnn.getProb(bestModel, datasets[1], P.validProbFilename, P)
cmdString = int(raw_input("\nDEC> ")) # Exit if cmdString == 0: import sys sys.exit(0) # Image preprocessing and segmentation elif cmdString == 1: imgPath = str(raw_input('\nInsert the path of the folder images: ')) imS.doSegmentation(imgPath) # Model Fitting elif cmdString == 2: datasetPath = str(raw_input('Insert the path of dataset: ')) dataset, datasetfileNames = loadDataset(datasetPath) epochs = int(raw_input('Insert number of epochs: ')) batchSize = int(raw_input('Insert the batch size: ')) dec.fit('adam', 'mean_squared_error', 'mae', dataset, epochs, batchSize) dec.save_model() # Clustering elif cmdString == 3: dec.load_model() testSetPath = str(raw_input('Insert the path of the test set: ')) testSet, testSetFileNames = loadDataset(testSetPath) print '\n', ' ' * 3, '0) KMeans' clustType = int(raw_input("\nDEC> "))
def filler(): dataSet=loadDataset("../pkl/small_data.pkl",3) fill_trainSet=fillerCore(dataSet[0]) fill_validSet=fillerCore(dataSet[1]) fill_testSet =fillerCore(dataSet[2]) return fill_trainSet,fill_validSet,fill_testSet
MODEL_NAME = 'base' ########################################################### # Define constants REGULARIZE = 0.0001 GRU_REGULARIZE = 0.0005 MAX_LENGTH = 30 DROPOUT = 0.2 HIDDEN_RNN_UNITS = 192 HIDDEN_DENSE_UNITS = 2048 LEARNING_RATE = 0.001 EPOCHS = 100 BATCH_SIZE = 64 ## Load Datasets train_x1, train_x2, train_features, train_y, valid_x1, valid_x2, valid_y, valid_features = loadDataset( ) print('Dataset Loaded') start_time = time.time() ## Load Embedding Matrix (embedding_matrix, vocab_size) = load_embedding_matrix() ## Define Model def build_model(): input_1 = Input(shape=(MAX_LENGTH, )) input_2 = Input(shape=(MAX_LENGTH, )) e = Embedding(vocab_size, 300,
def evaluate(thresholds_file, cpu_testset, iperf_testset, trainset, time_window_threshold): plt.clf() stats_json = data_prefix + 'normalization_stats.json' model = load_model('model/5g_autoencoder.h5') normalization_stats = loadDictJson(stats_json) cols_to_normalize = getFeatures() cols = [c + '_normalized' for c in cols_to_normalize] time_window_threshold = 30 refresh_time_interval = 15 n_steps = 4 n_features = len(cols) logging.info('Loading evaluation datasets') val_df = loadDataset(trainset) cpu_df = loadDataset(cpu_testset) iperf_df = loadDataset(iperf_testset) cpu_df.fillna(method='backfill', inplace=True) cpu_df.replace([np.inf, -np.inf], 0.0, inplace=True) iperf_df.fillna(method='backfill', inplace=True) iperf_df.replace([np.inf, -np.inf], 0.0, inplace=True) val_df.fillna(method='backfill', inplace=True) val_df.replace([np.inf, -np.inf], 0.0, inplace=True) logging.info('Normalizing evaluation data') for col in cols_to_normalize: cpu_df[col + '_normalized'] = normalizeFeature( cpu_df, col, normalization_stats[col + '_min'], normalization_stats[col + '_max']) iperf_df[col + '_normalized'] = normalizeFeature( iperf_df, col, normalization_stats[col + '_min'], normalization_stats[col + '_max']) val_df[col + '_normalized'] = normalizeFeature( val_df, col, normalization_stats[col + '_min'], normalization_stats[col + '_max']) logging.info('Evaluating for CPU and memory metrics') cpu_xs = [] cpu_ys = [] net_up_xs = [] net_up_ys = [] net_down_xs = [] net_down_ys = [] mem_xs_a1 = [] mem_ys_a1 = [] for sample_start in range(0, len(cpu_df) - time_window_threshold): sample_end = sample_start + time_window_threshold cpu_df_sample = cpu_df.iloc[sample_start:sample_end] # Select required columns for evaluation data batch cpu_dataset = cpu_df_sample[cols].to_numpy() # Prepare evaluation dataset batch X_test_cpu, y_test_cpu = split_sequences(cpu_dataset, n_steps) X_test_cpu = X_test_cpu.reshape((len(X_test_cpu), n_steps, n_features)) # Predict for evaluation dataset batch yhat_cpu = model.predict(X_test_cpu, verbose=0) cpu_rmse_dict = printPredictionErrors(y_test_cpu, yhat_cpu) net_up_xs.append(len(net_up_xs)) net_up_ys.append(cpu_rmse_dict['net_up_rmse']) net_down_xs.append(len(net_down_xs)) net_down_ys.append(cpu_rmse_dict['net_down_rmse']) cpu_xs.append(len(cpu_xs)) cpu_ys.append(cpu_rmse_dict['cpu_rmse']) mem_xs_a1.append(len(mem_xs_a1)) mem_ys_a1.append(cpu_rmse_dict['mem_rmse']) plt.plot(cpu_xs, cpu_ys, color='blue', label='CPU Percentage Rate (mode=user)') #plt.plot(mem_xs_a1, mem_ys_a1, color='red', label='Memory Percentage Rate') plt.title('CPU Attack Dataset') plt.xlabel('# of Sequence') plt.ylabel('RMSE') plt.legend() plt.savefig('plots/evaluate_cpu.png') plt.clf() logging.info('Evaluating for network and 5G metrics') net_up_xs = [] net_up_ys = [] net_down_xs = [] net_down_ys = [] net_5g_up_xs = [] net_5g_up_ys = [] net_5g_down_xs = [] net_5g_down_ys = [] mem_xs_a2 = [] mem_ys_a2 = [] for sample_start in range(0, len(iperf_df) - time_window_threshold): sample_end = sample_start + time_window_threshold iperf_df_sample = iperf_df.iloc[sample_start:sample_end] # Select required columns for evaluation data batch iperf_dataset = iperf_df_sample[cols].to_numpy() # Prepare evaluation dataset batch X_test_iperf, y_test_iperf = split_sequences(iperf_dataset, n_steps) X_test_iperf = X_test_iperf.reshape( (len(X_test_iperf), n_steps, n_features)) # Predict for evaluation dataset batch yhat_iperf = model.predict(X_test_iperf, verbose=0) iperf_rmse_dict = printPredictionErrors(y_test_iperf, yhat_iperf) net_up_xs.append(len(net_up_xs)) net_up_ys.append(iperf_rmse_dict['net_up_rmse']) net_down_xs.append(len(net_down_xs)) net_down_ys.append(iperf_rmse_dict['net_down_rmse']) net_5g_up_xs.append(len(net_5g_up_xs)) net_5g_up_ys.append(iperf_rmse_dict['net_up_5g_rmse']) net_5g_down_xs.append(len(net_5g_down_xs)) net_5g_down_ys.append(iperf_rmse_dict['net_down_5g_rmse']) mem_xs_a2.append(len(mem_xs_a2)) mem_ys_a2.append(iperf_rmse_dict['mem_rmse']) plt.plot(net_up_xs, net_up_ys, color='green', label='Network Up Rate') plt.plot(net_down_xs, net_down_ys, color='purple', label='Network Down Rate') #plt.plot(mem_xs_a2, mem_ys_a2, color='red', label='Memory Percentage Rate') plt.title('iperf Attack Dataset') plt.xlabel('# of Sequence') plt.ylabel('RMSE') plt.legend() plt.savefig('plots/evaluate_iperf_net.png') plt.clf() plt.plot(net_5g_up_xs, net_5g_up_ys, color='green', label='5G Network Up Rate') plt.plot(net_5g_down_xs, net_5g_down_ys, color='blue', label='5G Network Down Rate') plt.title('iperf Attack Dataset') plt.xlabel('# of Sequence') plt.ylabel('RMSE') plt.legend() plt.savefig('plots/evaluate_iperf_5g.png') plt.clf() logging.info('Evaluating with training data') cpu_xs = [] cpu_ys = [] net_up_xs = [] net_up_ys = [] net_down_xs = [] net_down_ys = [] net_5g_up_xs = [] net_5g_up_ys = [] net_5g_down_xs = [] net_5g_down_ys = [] mem_xs_n = [] mem_ys_n = [] for sample_start in range(0, len(val_df) - time_window_threshold): sample_end = sample_start + time_window_threshold val_df_sample = val_df.iloc[sample_start:sample_end] # Select required columns for evaluation data batch val_dataset = val_df_sample[cols].to_numpy() # Prepare evaluation dataset batch X_test_val, y_test_val = split_sequences(val_dataset, n_steps) X_test_val = X_test_val.reshape((len(X_test_val), n_steps, n_features)) # Predict for evaluation dataset batch yhat_val = model.predict(X_test_val, verbose=0) val_rmse_dict = printPredictionErrors(y_test_val, yhat_val) cpu_xs.append(len(cpu_xs)) cpu_ys.append(val_rmse_dict['cpu_rmse']) mem_xs_n.append(len(mem_xs_n)) mem_ys_n.append(val_rmse_dict['mem_rmse']) net_up_xs.append(len(net_up_xs)) net_up_ys.append(val_rmse_dict['net_up_rmse']) net_down_xs.append(len(net_down_xs)) net_down_ys.append(val_rmse_dict['net_down_rmse']) net_5g_up_xs.append(len(net_5g_up_xs)) net_5g_up_ys.append(val_rmse_dict['net_up_5g_rmse']) net_5g_down_xs.append(len(net_5g_down_xs)) net_5g_down_ys.append(val_rmse_dict['net_down_5g_rmse']) plt.plot(cpu_xs, cpu_ys, color='blue', label='CPU Percentage Rate (mode=user)') plt.plot(mem_xs_n, mem_ys_n, color='red', label='Memory Percentage Rate') plt.plot(net_up_xs, net_up_ys, color='green', label='Network Up Rate') plt.plot(net_down_xs, net_down_ys, color='purple', label='Network Down Rate') plt.title('Training Dataset (Edge Metrics)') plt.xlabel('# of Sequence') plt.ylabel('RMSE') plt.legend() plt.savefig('plots/evaluate_val_1.png') plt.clf() plt.plot(net_5g_up_xs, net_5g_up_ys, color='orange', label='5G Network Up Rate') plt.plot(net_5g_down_xs, net_5g_down_ys, color='cyan', label='5G Network Down Rate') plt.title('Training Dataset (5G Metrics)') plt.xlabel('# of Sequence') plt.ylabel('RMSE') plt.legend() plt.savefig('plots/evaluate_val_2.png') plt.clf()
from Net import Net, test_model from utils import confusion, F1_score, loadDataset, saveNNParas import time # Loading the previous network status. feature_num = 11 hidden_num = 30 output_num = 3 load_net = Net(feature_num, hidden_num, output_num) load_net.load_state_dict(torch.load('ann_net_model_genre.pt')) #load_net.load_state_dict(torch.load('net_model_subjective_rating.pt')) load_net.eval() # Loading testing dataset to evaluate new network. x_test, y_test = loadDataset('testing') # Loading the information of vector. vectors = pd.read_excel('ann_vector_angle_sample.xls', header=None) raw_df = pd.DataFrame({ 'row': vectors.iloc[:, 0], 'col': vectors.iloc[:, 1], 'vector': vectors.iloc[:, 2] }) # Sorting by the values of vector angle in ascending order. increase_res = raw_df.sort_values('vector', ascending=True) unique_row = increase_res.row.unique() unique_col = increase_res.col.unique() # Initialize all the status parameters.
def evaluate(thresholds_file, cpu_testset, iperf_testset, trainset, time_window_threshold): """ Evaluate trained model. If user has not set all thresholds for anomalies, evaluation will also set the remaining thresholds. Evaluation uses a dataset, that contains a CPU stress test, an iperf stress test and predicting the data used for training. Thresholds are defined by calculating the RMSEs from actual values and taking the 99th percentile of these errors for each feature separately and overall. If user has set all thresholds when starting the program,these thresholds will be used. param thresholds_file: File, where user-defined thresholds are saved. This will updated if new thresholds are proposed. param cpu_testset: File containing dataset with CPU stress test. param iperf_testset: File containing dataset with iperf stress test. param trainset: File containing the dataset used for training. param time_window_threshold: Time window for keeping the last-n records. In evaluation data are predicted in batches of n. return: None. """ # Loading thresholds from file. Create an empty dict if no file exists thresholds_dict = {} if path.exists(thresholds_file): thresholds_dict = loadDictJson(thresholds_file) logging.info('Loading evaluation datasets') val_df = loadDataset(trainset) cpu_df = loadDataset(cpu_testset) iperf_df = loadDataset(iperf_testset) cpu_df.fillna(method='backfill', inplace=True) cpu_df.replace([np.inf, -np.inf], 0.0, inplace=True) iperf_df.fillna(method='backfill', inplace=True) iperf_df.replace([np.inf, -np.inf], 0.0, inplace=True) val_df.fillna(method='backfill', inplace=True) val_df.replace([np.inf, -np.inf], 0.0, inplace=True) logging.info('Normalizing evaluation data') for col in cols_to_normalize: cpu_df[col + '_normalized'] = normalizeFeature( cpu_df, col, normalization_stats[col + '_min'], normalization_stats[col + '_max']) iperf_df[col + '_normalized'] = normalizeFeature( iperf_df, col, normalization_stats[col + '_min'], normalization_stats[col + '_max']) val_df[col + '_normalized'] = normalizeFeature( val_df, col, normalization_stats[col + '_min'], normalization_stats[col + '_max']) cpu_rmse = [] cpu_rx_rmse = [] cpu_tx_rmse = [] net_down_rmse = [] net_up_rmse = [] net_down_5g_rmse = [] net_up_5g_rmse = [] mem_rmse = [] total_rmse = [] logging.info('Evaluating for CPU and memory metrics') for sample_start in range(0, len(cpu_df) - time_window_threshold): sample_end = sample_start + time_window_threshold cpu_df_sample = cpu_df.iloc[sample_start:sample_end] # Select required columns for evaluation data batch cpu_dataset = cpu_df_sample[cols].to_numpy() # Prepare evaluation dataset batch X_test_cpu, y_test_cpu = split_sequences(cpu_dataset, n_steps) X_test_cpu = X_test_cpu.reshape((len(X_test_cpu), n_steps, n_features)) # Predict for evaluation dataset batch yhat_cpu = model.predict(X_test_cpu, verbose=0) cpu_rmse_dict = printPredictionErrors(y_test_cpu, yhat_cpu) total_rmse.append(cpu_rmse_dict['rmse_total']) cpu_rmse.append(cpu_rmse_dict['cpu_rmse']) cpu_rx_rmse.append(cpu_rmse_dict['cpu_rx_rmse']) cpu_tx_rmse.append(cpu_rmse_dict['cpu_tx_rmse']) mem_rmse.append(cpu_rmse_dict['mem_rmse']) logging.info('Evaluating for network and 5G metrics') for sample_start in range(0, len(iperf_df) - time_window_threshold): sample_end = sample_start + time_window_threshold iperf_df_sample = iperf_df.iloc[sample_start:sample_end] # Select required columns for evaluation data batch iperf_dataset = iperf_df[cols].to_numpy() # Prepare evaluation dataset batch X_test_iperf, y_test_iperf = split_sequences(iperf_dataset, n_steps) X_test_iperf = X_test_iperf.reshape( (len(X_test_iperf), n_steps, n_features)) # Predict for evaluation dataset batch yhat_iperf = model.predict(X_test_iperf, verbose=0) iperf_rmse_dict = printPredictionErrors(y_test_iperf, yhat_iperf) total_rmse.append(iperf_rmse_dict['rmse_total']) net_down_rmse.append(iperf_rmse_dict['net_down_rmse']) net_up_rmse.append(iperf_rmse_dict['net_up_rmse']) net_down_5g_rmse.append(iperf_rmse_dict['net_down_5g_rmse']) net_up_5g_rmse.append(iperf_rmse_dict['net_up_5g_rmse']) mem_rmse.append(iperf_rmse_dict['mem_rmse']) logging.info('Evaluating with training data') for sample_start in range(0, len(val_df) - time_window_threshold): sample_end = sample_start + time_window_threshold val_df_sample = val_df.iloc[sample_start:sample_end] # Select required columns for evaluation data batch val_dataset = val_df_sample[cols].to_numpy() # Prepare evaluation dataset batch X_test_val, y_test_val = split_sequences(val_dataset, n_steps) X_test_val = X_test_val.reshape((len(X_test_val), n_steps, n_features)) # Predict for evaluation dataset batch yhat_val = model.predict(X_test_val, verbose=0) val_rmse_dict = printPredictionErrors(y_test_val, yhat_val) total_rmse.append(val_rmse_dict['rmse_total']) cpu_rmse.append(val_rmse_dict['cpu_rmse']) cpu_rx_rmse.append(val_rmse_dict['cpu_rx_rmse']) cpu_tx_rmse.append(val_rmse_dict['cpu_tx_rmse']) mem_rmse.append(val_rmse_dict['mem_rmse']) net_down_rmse.append(val_rmse_dict['net_down_rmse']) net_up_rmse.append(val_rmse_dict['net_up_rmse']) net_down_5g_rmse.append(val_rmse_dict['net_down_5g_rmse']) net_up_5g_rmse.append(val_rmse_dict['net_up_5g_rmse']) # For thresholds, that are not defined by user, use suggested values if 'cpu_threshold' not in thresholds_dict.keys(): thresholds_dict['cpu_threshold'] = np.percentile(cpu_rmse, 0.99) if 'mem_threshold' not in thresholds_dict.keys(): thresholds_dict['mem_threshold'] = np.percentile(mem_rmse, 0.99) if 'cpu_tx_threshold' not in thresholds_dict.keys(): thresholds_dict['cpu_tx_threshold'] = np.percentile(cpu_tx_rmse, 0.99) if 'cpu_rx_threshold' not in thresholds_dict.keys(): thresholds_dict['cpu_rx_threshold'] = np.percentile(cpu_rx_rmse, 0.99) if 'net_up_threshold' not in thresholds_dict.keys(): thresholds_dict['net_up_threshold'] = np.percentile(net_up_rmse, 0.99) if 'net_down_threshold' not in thresholds_dict.keys(): thresholds_dict['net_down_threshold'] = np.percentile( net_down_rmse, 0.99) if 'net_5g_up_threshold' not in thresholds_dict.keys(): thresholds_dict['net_5g_up_threshold'] = np.percentile( net_up_5g_rmse, 0.99) if 'net_5g_down_threshold' not in thresholds_dict.keys(): thresholds_dict['net_5g_down_threshold'] = np.percentile( net_down_5g_rmse, 0.99) if 'overall_threshold' not in thresholds_dict.keys(): thresholds_dict['overall_threshold'] = np.percentile(total_rmse, 0.99) # Save new thresholds in same file saveDictJson(thresholds_dict, thresholds_file)
from reducing_net import reduced_rnn_net from utils import confusion, F1_score, loadDataset, saveNNParas import time # Loading the previous network status. input_dim = 1 hidden_dim = 50 layer_dim = 1 output_dim = 3 # Four kinds of genres within 12 songs. load_rnn = RNN_model(input_dim, hidden_dim, layer_dim, output_dim) load_rnn.load_state_dict(torch.load('rnn_model.pt')) load_rnn.eval() # Loading testing dataset to evaluate new network. x_test, y_test = loadDataset('testing_sequence') flat_input_test = x_test.unsqueeze(-1) # Various sequence length used for padding sequence and packed sequence in rnn modol. l = [1104, 1028, 980, 964, 960, 956, 956, 932, 868, 840, 836, 808] test_seq_lens = np.zeros((4 * 12)) for i in range(len(l)): test_seq_lens[i * 4:(i + 1) * 4] = l[i] # Loading the information of vector. vectors = pd.read_excel('rnn_vector_angle_sample.xls', header=None) raw_df = pd.DataFrame({ 'row': vectors.iloc[:, 0], 'col': vectors.iloc[:, 1], 'angle': vectors.iloc[:, 2] })
# Operation of addition. load_net.hidden.weight[2] += load_net.hidden.weight[16] load_net.hidden.weight[5] += load_net.hidden.weight[23] load_net.hidden.weight[8] += load_net.hidden.weight[22] # Slicing the remained weight values and bias values in a new-sized network. new_net = Net(11, 27, 3) new_net.hidden.weight[:16] = load_net.hidden.weight[:16] new_net.hidden.weight[16:21] = load_net.hidden.weight[17:22] new_net.hidden.weight[21:] = load_net.hidden.weight[24:] new_net.hidden.bias[:16] = load_net.hidden.bias[0:16] new_net.hidden.bias[16:21] = load_net.hidden.bias[17:22] new_net.hidden.bias[21:] = load_net.hidden.bias[24:] new_net.output.weight[:, :16] = load_net.output.weight[:, 0:16] new_net.output.weight[:, 16:21] = load_net.output.weight[:, 17:22] new_net.output.weight[:, 21:] = load_net.output.weight[:, 24:] new_net.output.bias[:] = load_net.output.bias[:] new_net.eval() # Reload the test dateset and evaluate the shrinked network. x_test, y_test = loadDataset() acc, pred = test_model(new_net, x_test, y_test) mat = confusion(x_test.size(0), 3, pred, y_test) print("Confusion Matrix (after pruning):") print(mat) F1_score(mat)
label = pp.correctLabel(endIndxGroup = endIndxGroup, name = name, label = label) pp.writeFile(filename = smoothedFilename, name = name, label = label) class Logger(object): def __init__(self, logFilename): self.terminal = sys.stdout self.log = open(logFilename, "a") def write(self, message): self.terminal.write(message) self.log.write(message) if __name__ == '__main__': P = dnnUtils.Parameters(setting) print P.outputFilename datasets = utils.loadDataset(filename = P.datasetFilename, totalSetNum=3) if not USE_EXIST_MODEL: sys.stdout = Logger(P.logFilename) bestModel = dnn.trainDNN(datasets, P) bestModelFilename = '../model/' + P.outputFilename + '.model' utils.makePkl(bestModel, P.bestModelFilename) else: # TODO use filename to build P bestModelFilename = sys.argv[2] bestModel = utils.loadPkl(bestModelFilename) dnn.getResult(bestModel, datasets[1], P, 'valid', P.validResultFilename) dnn.getResult(bestModel, datasets[2], P, 'test', P.testResultFilename) dnn.getProb(bestModel, datasets[0], P.trainProbFilename, P) dnn.getProb(bestModel, datasets[1], P.validProbFilename, P)
def build_new_image(path, k_means_instance, input_dim, encoder, new_name='_new.png'): import sys print '\nSelect the distance you want for the choice of the best centroid' print '\n', ' ' * 3, '0) Mean Absolute Error compute on the encoded representation' print ' ' * 3, '1) Mean Squared Error compute on the encoded representation' print ' ' * 3, '2) Hausdorff distance on the binarized original images' distances = ['mae', 'mse', 'hausdorff'] distance_type = int(raw_input("\nDEC> ")) image = io.imread(path, as_grey=True) processed = image normalized_height = int(np.sqrt(input_dim)) normalized_width = normalized_height processed = imageSegmentation.image_preproc( processed, binary_threshold=threshold_otsu(processed)) labeled_image, num_features, max_width, max_height, max_label = imageSegmentation.find_connected_components( processed) centroidSet, centroidNames = loadDataset('Centroids') centroidImages = centroidSet # Get the encoded representation of the centroids centroidSet = encoder.predict(centroidSet) # Here I create a new image all white that will host each replaced char base_image = 255 * np.ones(image.shape) predictes = list() for i in range(1, max_label): r_s, c_s = np.where(labeled_image == i) if len(r_s) > 1 and len(c_s) > 1: # get the char to be replaced to_replace = image[min(r_s) - 1:max(r_s) + 2, min(c_s) - 1:max(c_s) + 2] # resize before passing through the net to_replace = imresize(to_replace, (normalized_height, normalized_width)) to_predict = to_replace if distance_type != 2: # get the encoded representation of the char to_predict = np.array( to_replace.reshape( (1, normalized_height * normalized_width))) to_predict = to_predict.astype('float32') / 255. to_predict = encoder.predict(to_predict) predictes.append([to_predict, r_s, c_s]) pool = ThreadPoolExecutor(8) list_futures = list() for i in range(1, len(predictes)): list_futures.append( pool.submit(find_and_subst, i, base_image, centroidSet, predictes, centroidImages, metric=distances[distance_type])) for i in range(len(list_futures)): r = list_futures[i].result() sys.stdout.write(u'\u001b[1000D' + bcolors.RED + 'Creating: ' + str(ceil(i * 100 / len(list_futures))) + '%') sys.stdout.flush() sys.stdout.write(bcolors.RESET) plt.imsave('temp.png', base_image, cmap=plt.cm.gray)