def trainWithDF(sparkContext, rawDataFrame): simMat = cm.getSimilarityMatrixByRDD(sparkContext, rawDataFrame) fp.recordSimMatrix(simMat, pv.simMatrixFile) model, unifiedRDDVecs = cluster.getClusterModel(sparkContext, simMat, rawDataFrame, (pv.truncateLineCount/pv.IDFOREACHCLUSTER), pv.dimensionReductionNum, pv.eigenVecFile) eva.evaluateModel(model, unifiedRDDVecs) fp.outputNodesInSameCluster(model, unifiedRDDVecs, rawDataFrame, pv.clusterIDCenterFile, pv.clusterIDFile) decisionTreeModel = classification.process(sparkContext, (pv.truncateLineCount/pv.IDFOREACHCLUSTER), pv.treeMaxDepth, pv.treeMaxBins, pv.eigenVecFile, pv.clusterIDFile) model.save(sparkContext, pv.clusterModelPath) Utils.logMessage("\nTrain cluster model finished")
def evaluate(self, config=None): if config is None: config = self._config["evaluation"] evaluation = Evaluation(self.iterators[config['iterator']], self.model, self.candidator, self.stats[config['stats']], sampling=config['sampling'] if 'sampling' in config else None, log_path=self.path + "/evaluation.txt", db=self.db, trained_mentions=self.trained_mentions) evaluation.evaluate()
def train(sparkContext): fp.truncate(pv.mergedAccountFile, pv.truncatedFile, pv.truncateLineCount) fp.preprocess(pv.truncatedFile, pv.processedFile, pv.targetFields) pd.read_csv(pv.trainingFile, sep=',',encoding='utf-8').to_csv(pv.fileForClusterModel, index=False, encoding='utf-8') rawDataFrame = pd.read_csv(pv.fileForClusterModel, sep=',',encoding='utf-8') simMat = cm.getSimilarityMatrix(sparkContext, rawDataFrame) fp.recordSimMatrix(simMat, pv.simMatrixFile) model, unifiedRDDVecs = cluster.getClusterModel(sparkContext, simMat, rawDataFrame, (pv.truncateLineCount/pv.IDFOREACHCLUSTER), pv.dimensionReductionNum, pv.eigenVecFile) eva.evaluateModel(model, unifiedRDDVecs) fp.outputNodesInSameCluster(model, unifiedRDDVecs, rawDataFrame, pv.clusterIDCenterFile, pv.clusterIDFile) decisionTreeModel = classification.process(sparkContext, (pv.truncateLineCount/pv.IDFOREACHCLUSTER), pv.treeMaxDepth, pv.treeMaxBins, pv.eigenVecFile, pv.clusterIDFile) model.save(sparkContext, pv.clusterModelPath) Utils.logMessage("\nTrain cluster model finished")
def DecisionTreeProcess(trainingSet, testSet, imp, dtMaxDepth, dtMaxBins): decisionTreeModel = DecisionTree.trainClassifier(trainingSet, numClasses = 4,categoricalFeaturesInfo={}, impurity=imp,maxDepth=dtMaxDepth, maxBins=dtMaxBins) predictions = decisionTreeModel.predict(trainingSet.map(lambda item: item.features)) trainingLabelsAndPredictions = trainingSet.map(lambda item: item.label).zip(predictions) eva.calculateErrorRate("\nClassification model Training set", trainingLabelsAndPredictions) predictions = decisionTreeModel.predict(testSet.map(lambda item: item.features)) testLabelsAndPredictions = testSet.map(lambda item: item.label).zip(predictions) eva.calculateErrorRate("\nClassification model Test set", testLabelsAndPredictions) return decisionTreeModel
def __init__(self, wire, ber, speed, id, stationType = "client"): super(Station, self).__init__("node" + str(id)) self.contextProviders.append( openwns.probebus.ConstantContextProvider( "DLL.StationType", Evaluation.toStaTypeId(stationType))) # Physical Layer (PHY) self.phy = copper.Copper.Transceiver(self, # Name of the PHY "phy", # Medium to which the instance is attached to wire, # BER this instance experiences ber, # Transmit data rate speed) # Data Link Layer (DLL) self.dll = Tutorial.Experiment4(self, "ShortCut", self.phy.dataTransmission, self.phy.notification, stationType = stationType) # Network Layer (NL) domainName = "node" + str(id) + ".glue.wns.org" self.nl = ip.Component.IPv4Component(self, # Name of the NL domainName + ".ip", # Domain name domainName) # Connect NL instance to the DLL interface self.nl.addDLL( # Name of the DLL interface (Only used within the NL module. Hence, it may differ from # the name given to the DLL instance during instantiation) _name = "glue", # Where to get my IP Address _addressResolver = ip.AddressResolver.VirtualDHCPResolver("theOnlySubnet"), # ARP zone _arpZone = "theOnlySubnet", # We can deliver locally _pointToPoint = False, # DLL SAP for outgoing unicast transmissions _dllDataTransmission = self.dll.unicastDataTransmission, # DLL SAP for incoming unicast transmissions _dllNotification = self.dll.unicastNotification) # Traffic generator self.load = constanze.node.ConstanzeComponent(self, "constanze", parentLogger = self.logger)
def EvalKLD(summText, pModel): KLScore=[] #Get the model from the summarization text summModel=getModel(summText, len(pModel.values())).prob summModelCopy=summModel.copy() pModelCopy=pModel.copy() Niters=1 KLScore.append(Ev.getKLScore(summModelCopy,pModelCopy)) #Niters is the number of iterations on which score is obtained return KLScore
def process(sc, dtClusterNum, dtMaxDepth, dtMaxBins, eigenVecFile, markedClusterFile): filteredEigenVec = sc.textFile(eigenVecFile).map(lambda item: removeVirtualPart(item)).collect() clusterIDs = sc.textFile(markedClusterFile).map(lambda item: extractClusterID(item)).collect() clusterIdEigenVecMapRDD = sc.parallelize(clusterIDs).zip(sc.parallelize(filteredEigenVec)) labeledClusterIdEigenVecMapRdd = clusterIdEigenVecMapRDD.map(lambda item: LabeledPoint(item[0], item[1])) trainingSet, testSet = labeledClusterIdEigenVecMapRdd.randomSplit([0.7, 0.3]) decisionTreeModel = DecisionTree.trainClassifier(trainingSet, numClasses = dtClusterNum, categoricalFeaturesInfo={},impurity='entropy',maxDepth=dtMaxDepth, maxBins=dtMaxBins) predictions = decisionTreeModel.predict(trainingSet.map(lambda item: item.features)) trainingLabelsAndPredictions = trainingSet.map(lambda item: item.label).zip(predictions) eva.calculateErrorRate("\nCluster model Training set", trainingLabelsAndPredictions) predictions = decisionTreeModel.predict(testSet.map(lambda item: item.features)) testLabelsAndPredictions = testSet.map(lambda item: item.label).zip(predictions) eva.calculateErrorRate("\nCluster model Test set", testLabelsAndPredictions) return decisionTreeModel
def boucle_detect_eval(self, range_order, range_seuil) : if self.distMoy == [] : self.calcul_dist() resultat = [[0.0 for u in range(range_order)] for v in range(range_seuil)] for i in range(range_order) : for j in range(range_seuil): self.order = 4 + i self.alpha = 1 + 0.1 * j # en fonction de j plus tard self.detect() resultat[j][i] = Evaluation.evalu() return resultat
def trainOptimalModel(trainingData, testData): print "\nTraining optimal Random Forest model started!" Utils.logTime() numTreesVals = [3,5,8] featureSubsetStrategyVals = ['auto','all','sqrt','log2','onethird'] impurityVals = ['gini', 'entropy'] maxDepthVals = [3,4,5,6,7] maxBinsVals = [8,16,32] optimalModel = None optimalNumTrees = None optimalFeatureSubsetStrategy = None optimalMaxDepth = None optimalImpurity = None optimalBinsVal = None minError = None try: for curNumTree in numTreesVals: for curFeatureSubsetStrategy in featureSubsetStrategyVals: for curImpurity in impurityVals: for curMaxDepth in maxDepthVals: for curMaxBins in maxBinsVals: model = RandomForest.trainClassifier(trainingData, numClasses=2, categoricalFeaturesInfo={}, numTrees=curNumTree, featureSubsetStrategy=curFeatureSubsetStrategy, impurity=curImpurity, maxDepth=curMaxDepth, maxBins=curMaxBins) testErr = Evaluation.evaluate(model, testData) if testErr < minError or not minError: minError = testErr optimalNumTrees = curNumTree optimalFeatureSubsetStrategy = curFeatureSubsetStrategy optimalImpurity = curImpurity optimalMaxDepth = curMaxDepth optimalBinsVal = curMaxBins optimalModel = model except: msg = "\nException during model training with below parameters:" msg += "\tnum trees: " + str(optimalNumTrees) msg += "\tfeature subset strategy: " + optimalFeatureSubsetStrategy msg += "\timpurity: " + str(curImpurity) msg += "\tmaxDepth: " + str(curMaxDepth) msg += "\tmaxBins: " + str(curMaxBins) Utls.logMessage(msg) logMessage(optimalModel, optimalNumTrees, optimalFeatureSubsetStrategy, optimalMaxDepth, optimalImpurity, optimalBinsVal, minError) return optimalModel
def run(searchForOptimal, basepath, filepath): sc = buildContext() trainingData, testData = loadData(sc, basepath, filepath) if searchForOptimal: optimalRandomForestModel = RandomForest.trainOptimalModel(trainingData, testData) Evaluation.evaluate(optimalRandomForestModel, testData, logMessage=True) optimalDecisionTreeModel = DecisionTree.trainOptimalModel(trainingData, testData) Evaluation.evaluate(optimalDecisionTreeModel, testData, logMessage=True) else: randomForestModel = RandomForest.trainModel(trainingData) Evaluation.evaluate(randomForestModel, testData, logMessage=True) decisionTreeModel = DecisionTree.trainModel(trainingData) Evaluation.evaluate(decisionTreeModel, testData, logMessage=True)
def EvalKLDRandomSamp(textList,Niters,pModelCopy,Nsumm): KLScore=[]; for iters in range(Niters): index=np.random.uniform(0,len(textList),Nsumm) indexALLRS=np.array([int(i) for i in index]) summText=[textList[int(i)] for i in index] summModel=getModel(summText, len(pModelCopy.values())).prob summModelCopy=summModel.copy() pModelCopy=pModel.copy() Niters=1 KLScore.append(Ev.getKLScore(summModelCopy,pModelCopy)) #Niters is the number of iterations on which score is obtained #KLScore.append(EvalKLD(summText, pModelCopy)) return (KLScore, summModel, summText, indexALLRS);
def trainOptimalModel(trainingData, testData): print "\nTraining optimal Decision Tree model started!" Utils.logTime() impurityVals = ['gini', 'entropy'] maxDepthVals = [3,4,5,6,7] maxBinsVals = [8,16,32] optimalModel = None optimalMaxDepth = None optimalImpurity = None optimalBinsVal = None minError = None try: for curImpurity in impurityVals: for curMaxDepth in maxDepthVals: for curMaxBins in maxBinsVals: model = DecisionTree.trainClassifier(trainingData, numClasses=2, categoricalFeaturesInfo={}, impurity=curImpurity, maxDepth=curMaxDepth, maxBins=curMaxBins) testErr, PR, ROC = Evaluation.evaluate(model, testData) if testErr < minError or not minError: minError = testErr optimalImpurity = curImpurity optimalMaxDepth = curMaxDepth optimalBinsVal = curMaxBins optimalModel = model except: msg = "\nException during model training with below parameters:" msg += "\timpurity: " + str(curImpurity) msg += "\tmaxDepth: " + str(curMaxDepth) msg += "\tmaxBins: " + str(curMaxBins) Utils.logMessage(msg) logMessage(optimalModel, optimalMaxDepth, optimalImpurity, optimalBinsVal, minError) return optimalModel
def splitBySignals(dataStep): # pass #if __name__ == '__main__': segments= [] for input, target in dataStep: targetInt = np.argmax(Evaluation.addNoGestureSignal(target), 1) inds= np.where(targetInt[:-1]!= targetInt[1:])[0] lastInd = -1 for ind in inds: if targetInt[ind] != np.max(targetInt): iSegment = input[lastInd+1:ind+1] tSegement = target[lastInd+1:ind+1] tSegement[0,:]=0 tSegement[-1,:]=0 segments.append((iSegment,tSegement)) lastInd = ind ind = len(targetInt)-1 iSegment = input[lastInd+1:ind+1] tSegement = target[lastInd+1:ind+1] tSegement[0,:]=0 tSegement[-1,:]=0 segments.append((iSegment,tSegement)) return segments
from Components import * """ Load data into a numpy matrix. (1) load function is used to load a .npy binary file, which can be generated from a .txt file (2) genfromtxt generates a numpy array from a raw text file """ print "Loading", sys.argv[1], "similarity matrix..." cost_mat = load(sys.argv[1]) #cost_mat = genfromtxt(data_path + '/cost-matrices/' + sys.argv[1]) """ Compute the bullseye score. Assuming MPEG-7 data is loaded """ e = Evaluation(cost_mat, 20, 70) print "Top 40 bullseye score: ", e.bullseye(40) """ Compute a new similarity matrix using dice coefficient as a population cue """ #Geometric mean, to ensure symmetry cost_mat = sqrt(cost_mat * cost_mat.transpose()) p = Population(cost_mat, 20, 70, verbose=True) #Not setting k will attempt to automatically find it! processed_matrix = p.generate_diff(k=13) e = Evaluation(processed_matrix, 20, 70) print "Top 40 bullseye score using dice: ", e.bullseye(40) """ Update the similarity matrix further using the previous
li_results = [] #5) Like in the training set,Loop through the test set, to get the individual words for file_name in testset: minimum_neg_log_prob = 1000000000 min_category = '' li = get_list_tokens_nltk(corpus, file_name) set_list_words = set([w for w in li if w in word_list]) ##6) Get the probability for each category, #using the cat_num_docs dictionary to wade through the categories for cat in cat_num_docs: neg_log_prob = -log(cat_num_docs[cat] / len(trainset)) for w in word_cat_num_doc_dict: if w in set_list_words: neg_log_prob -= log(word_cat_num_doc_dict[w][cat]) else: neg_log_prob -= log(1 - word_cat_num_doc_dict[w][cat]) if minimum_neg_log_prob > neg_log_prob: min_category = cat minimum_neg_log_prob = neg_log_prob li_results.append((file_name, min_category, f2c(corpus, file_name))) if binary_classification: Evaluation.evaluation_binary(li_results) else: Evaluation.evaluation_multi_class(li_results, cat_num_docs.keys()) print "The time taken by the trained classifier to assign labels" print time.time() - start_time, "seconds"
#Build the Probability density Function print "Building the Model" pModel={} #pModel=getModel(textList, Nterms).prob #pModel=getModel_UD(textList) pModel=est_probability_UD(textList); f=open('Model1.txt','w'); dump_model(pModel,f) f.close() # get the length Model wordList=pModel.keys(); lenPdf=getlengthpdf(textList, wordList) # Get the sentiment joint probability [textSentiDict, sentiDict] = Ev.getSentimentScoreAllText(textList); #[sentimentProb, textSentiDict, sentiDict]=getSentimentProb(pModel.keys(), textList) sentimentProb=getSentimentProb_FromEV(pModel.keys(), textList); #GEt the TF Matrix for the input Tweets cleanWords=cleanUpWords(pModel.keys()) TFMat=[]; for i in range(len(textList)): TFMat.append(np.array([0 for col in range(len(pModel.values()))])) TFvec=np.array([0 for col in range(len(pModel.values()))]) for i in range(len(textList)): textWords=createStemmedWordList(textList[i]) j=0
def sampling(settings, types_dict, types_dict_c, out, ncounterfactuals, clf, n_batches_train, n_samples_train, k, n_input, degree_active): argvals = settings.split() args = Helpers.getArgs(argvals) # Creating graph sess_HVAE = tf.Graph() with sess_HVAE.as_default(): # args.model_name: excluded tf_nodes = Graph.C_CHVAE_graph( args.types_file, args.types_file_c, learning_rate=1e-3, z_dim=args.dim_latent_z, y_dim=args.dim_latent_y, s_dim=args.dim_latent_s, y_dim_partition=args.dim_latent_y_partition, nsamples=1000, p=2) # start session with tf.Session(graph=sess_HVAE) as session: # Add ops to save and restore all the variables. saver = tf.train.Saver() print('Initizalizing Variables ...') tf.global_variables_initializer().run() # -----------------------------------------------------------------------------------# # Apply on training data print('Training the CHVAE ...') if (args.train == 1): start_time = time.time() # Training cycle loglik_epoch = [] KL_s_epoch = [] KL_z_epoch = [] for epoch in tqdm(range(args.epochs)): avg_loss = 0. avg_KL_s = 0. avg_KL_z = 0. samples_list = [] p_params_list = [] q_params_list = [] log_p_x_total = [] # Annealing of Gumbel-Softmax parameter tau = np.max([1.0 - 0.001 * epoch, 1e-3]) # Randomize the data in the mini-batches train_data = out['training'][1] train_data_c = out['training'][2] random_perm = np.random.permutation( range(np.shape(train_data)[0])) train_data_aux = train_data[random_perm, :] train_data_aux_c = train_data_c[random_perm, :] for i in range(n_batches_train): # Create inputs for the feed_dict data_list = Helpers.next_batch(train_data_aux, types_dict, args.batch_size, index_batch=i) # DONE data_list_c = Helpers.next_batch(train_data_aux_c, types_dict_c, args.batch_size, index_batch=i) # DONE # Create feed dictionary feedDict = { i: d for i, d in zip(tf_nodes['ground_batch'], data_list) } feedDict.update({ i: d for i, d in zip(tf_nodes['ground_batch_c'], data_list_c) }) feedDict[tf_nodes['tau_GS']] = tau feedDict[tf_nodes['batch_size']] = args.batch_size # Running VAE _, X_list, loss, KL_z, KL_s, samples, log_p_x, p_params, q_params = session.run( [ tf_nodes['optim'], tf_nodes['X'], tf_nodes['loss_re'], tf_nodes['KL_z'], tf_nodes['KL_s'], tf_nodes['samples'], tf_nodes['log_p_x'], tf_nodes['p_params'], tf_nodes['q_params'] ], feed_dict=feedDict) # Collect all samples, distirbution parameters and logliks in lists if i == 0: samples_list = [samples] p_params_list = [p_params] q_params_list = [q_params] log_p_x_total = [log_p_x] else: samples_list.append(samples) p_params_list.append(p_params) q_params_list.append(q_params) log_p_x_total.append(log_p_x) # Compute average loss avg_loss += np.mean(loss) avg_KL_s += np.mean(KL_s) avg_KL_z += np.mean(KL_z) # Concatenate samples in arrays s_total, z_total, y_total, est_data = Helpers.samples_concatenation( samples_list) # Transform discrete variables back to the original values train_data_transformed = Helpers.discrete_variables_transformation( train_data_aux[:n_batches_train * args.batch_size, :], types_dict) est_data_transformed = Helpers.discrete_variables_transformation( est_data, types_dict) # Create global dictionary of the distribution parameters p_params_complete = Helpers.p_distribution_params_concatenation( p_params_list, # DONE types_dict, args.dim_latent_z, args.dim_latent_s) q_params_complete = Helpers.q_distribution_params_concatenation( q_params_list, # DONE args.dim_latent_z, args.dim_latent_s) # Compute mean and mode of our loglik models: these correspond to the estimated values loglik_mean, loglik_mode = Helpers.statistics( p_params_complete['x'], types_dict) # DONE # Try this for the errors error_train_mean = Helpers.error_computation( train_data_transformed, loglik_mean, types_dict) error_train_mode = Helpers.error_computation( train_data_transformed, loglik_mode, types_dict) error_train_samples = Helpers.error_computation( train_data_transformed, est_data_transformed, types_dict) # Display logs per epoch step if epoch % args.display == 0: print_loss(epoch, start_time, avg_loss / n_batches_train, avg_KL_s / n_batches_train, avg_KL_z / n_batches_train) print("") # Plot evolution of test loglik loglik_per_variable = np.sum(np.concatenate(log_p_x_total, 1), 1) / n_samples_train loglik_epoch.append(loglik_per_variable) # -----------------------------------------------------------------------------------# # Apply on test data for i in range(1): samples_test_list = [] test_params_list = [] log_p_x_test_list = [] data_c_list = [] test_data_counter = out['test_counter'][1] test_data_c_counter = out['test_counter'][2] y_test_counter = out['test_counter'][3] n_samples_test = test_data_counter.shape[0] # Create test minibatch data_list = Helpers.next_batch(test_data_counter, types_dict, n_samples_test, index_batch=i) data_list_c = Helpers.next_batch(test_data_c_counter, types_dict_c, n_samples_test, index_batch=i) # DONE # Constant Gumbel-Softmax parameter (where we have finished the annealing tau = 1e-3 # Create feed dictionary feedDict = { i: d for i, d in zip(tf_nodes['ground_batch'], data_list) } feedDict.update({ i: d for i, d in zip(tf_nodes['ground_batch_c'], data_list_c) }) feedDict[tf_nodes['tau_GS']] = tau feedDict[tf_nodes[ 'batch_size']] = ncounterfactuals # n_samples_test # Get samples from the generator function (computing the mode of all distributions) samples_test, log_p_x_test, test_params, theta_test, normalization_params_test, X, delta_kl = session.run( [ tf_nodes['samples_test'], tf_nodes['log_p_x_test'], tf_nodes['test_params'], tf_nodes['theta_test'], tf_nodes['normalization_params'], tf_nodes['X'], tf_nodes['delta_kl'] ], feed_dict=feedDict) samples_test_list.append(samples_test) test_params_list.append(test_params) log_p_x_test_list.append(log_p_x_test) data_c_list.append(data_list_c) # Concatenate samples in arrays s_total_test, z_total_test, y_total_test, samples_total_test = Helpers.samples_concatenation( samples_test_list) # Transform discrete variables back to the original values est_samples_transformed = Helpers.discrete_variables_transformation( samples_total_test, types_dict) # -----------------------------------------------------------------------------------# # Find k Attainable Counterfactuals print('[*] Find Attainable Counterfactuals...') counter_batch_size = 1 # counterfactual batch size (i.e. look for counterfactuals one by one) data_concat = [] data_concat_c = [] counterfactuals = [] latent_tilde = [] latent = [] search_samples = args.search_samples p = args.norm_latent_space for i in tqdm(range(ncounterfactuals)): s = (k, n_input) # preallocate k spots; # inputs sz = (k, args.dim_latent_z) s = np.zeros(s) sz = np.zeros(sz) ik = 0 # counter l = 0 step = args.step_size x_adv, y_adv, z_adv, d_adv = None, None, None, None #scale test observations scaled_test, scaler_test = Helpers.standardize( test_data_counter) # get one test observation data_list = Helpers.next_batch(test_data_counter, types_dict, counter_batch_size, index_batch=i) data_list_c = Helpers.next_batch(test_data_c_counter, types_dict_c, counter_batch_size, index_batch=i) hat_y_test = np.repeat(y_test_counter[i] * 1, search_samples, axis=0) test_data_c_replicated = np.repeat( test_data_c_counter[i, :].reshape(1, -1), search_samples, axis=0) replicated_scaled_test = np.repeat(scaled_test[i, :].reshape( 1, -1), search_samples, axis=0) # get replicated observations (observation replicated nsamples times) #replicated_scaled_test = Helpers.replicate_data_list(data_list_scaled, search_samples) replicated_data_list = Helpers.replicate_data_list( data_list, search_samples) replicated_data_list_c = Helpers.replicate_data_list( data_list_c, search_samples) replicated_z = np.repeat(z_total_test[i].reshape( -1, args.dim_latent_z), search_samples, axis=0) h = l + step # counter to stop count = 0 counter_step = 1 max_step = 500 while True: count = count + counter_step if (count > max_step) == True: sz = None s = None z = z_total_test[i].reshape(-1, args.dim_latent_z) break if degree_active == 1: #choose all latent features for search delta_z = np.random.randn( search_samples, replicated_z.shape[1] ) # http://mathworld.wolfram.com/HyperspherePointPicking.html d = np.random.rand(search_samples) * ( h - l) + l # length range [l, h) norm_p = np.linalg.norm(delta_z, ord=p, axis=1) d_norm = np.divide(d, norm_p).reshape( -1, 1) # rescale/normalize factor delta_z = np.multiply(delta_z, d_norm) z_tilde = replicated_z + delta_z # z tilde else: delta_z = np.random.randn( search_samples, replicated_z.shape[1] ) # http://mathworld.wolfram.com/HyperspherePointPicking.html d = np.random.rand(search_samples) * ( h - l) + l # length range [l, h) norm_p = np.linalg.norm(delta_z, ord=p, axis=1) d_norm = np.divide(d, norm_p).reshape( -1, 1) # rescale/normalize factor delta_z = np.multiply(delta_z, d_norm) mask = np.tile( delta_kl[3][0, :] * 1, (search_samples, 1)) # only alter most important latent features delta_z = np.multiply(delta_z, mask) z_tilde = replicated_z + delta_z # create feed dictionary feedDict = { i: d for i, d in zip(tf_nodes['ground_batch'], replicated_data_list) } feedDict.update({ i: d for i, d in zip(tf_nodes['ground_batch_c'], replicated_data_list_c) }) feedDict[tf_nodes['samples_z']] = z_tilde feedDict[tf_nodes['tau_GS']] = tau feedDict[tf_nodes['batch_size']] = search_samples theta_perturbed, samples_perturbed = session.run( [ tf_nodes['theta_perturbed'], tf_nodes['samples_perturbed'] ], feed_dict=feedDict) x_tilde, params_x_perturbed = Evaluation.loglik_evaluation_test( X_list, theta_perturbed, normalization_params_test, types_dict) x_tilde = np.concatenate(x_tilde, axis=1) scaled_tilde = scaler_test.transform(x_tilde) d_scale = np.sum(np.abs(scaled_tilde - replicated_scaled_test), axis=1) x_tilde = np.c_[test_data_c_replicated, x_tilde] y_tilde = clf.predict(x_tilde) indices_adv = np.where(y_tilde == 0)[0] if len(indices_adv) == 0: # no candidate generated l = h h = l + step elif all(s[k - 1, :] == 0): # not k candidates generated indx = indices_adv[np.argmin(d_scale[indices_adv])] assert (y_tilde[indx] != 1) s[ik, :] = x_tilde[indx, :] sz[ik, :] = z_tilde[indx, :] z = z_total_test[i].reshape(-1, args.dim_latent_z) ik = ik + 1 # up the count l = h h = l + step else: # k candidates genereated break data_concat.append(np.concatenate(data_list, axis=1)) data_concat_c.append(np.concatenate(data_list_c, axis=1)) counterfactuals.append(s) latent_tilde.append(sz) latent.append(z) cchvae_counterfactuals = np.array(counterfactuals) return cchvae_counterfactuals
def test_recall(y_pred): y_true = [0, 1] expect = recall_score(y_true, y_pred) assert ev.recall(y_true, y_pred) == expect
print("----------------------------------------------------------------------") print("----------------------------------------------------------------------") print("") print( f"Recommended songs:\n{personal_recommendations[:top_hits].drop(['user_id', 'score', 'rank'], axis = 1)}" ) print("----------------------------------------------------------------------") print(" \n Starting error evaluation\n ") #Evaluation start = time.time() #Define what percentage of users to use for precision recall calculation user_sample = 0.05 #Instantiate the precision_recall_calculator class pr = Evaluation.precision_recall_calculator(test_data, train_data, pm, is_model) #Call method to calculate precision and recall values (pm_avg_precision_list, pm_avg_recall_list, ism_avg_precision_list, ism_avg_recall_list) = pr.calculate_measures(user_sample) end = time.time() print(end - start) #Visually display the precision and recall quality of both models print(" \nPlotting precision recall curves.") plot_precision_recall(pm_avg_precision_list, pm_avg_recall_list, "popularity_model", ism_avg_precision_list, ism_avg_recall_list, "item_similarity_model")
def Model(Label,Parameters=[]): global filepath, filename, fixed_seed_num, sequence_window, number_class, hidden_units, input_dim, learning_rate, epoch, is_multi_scale, training_level, cross_cv, is_add_noise, noise_ratio try: filepath = Parameters["filepath"] filename = Parameters["filename"] sequence_window = Parameters["sequence_window"] number_class = Parameters["number_class"] hidden_units = Parameters["hidden_units"] input_dim = Parameters["input_dim"] learning_rate = Parameters["learning_rate"] epoch = Parameters["epoch"] is_multi_scale = Parameters["is_multi_scale"] training_level = Parameters["training_level"] cross_cv = Parameters["cross_cv"] fixed_seed_num = Parameters["fixed_seed_num"] is_add_noise = Parameters["is_add_noise"] noise_ratio = Parameters["noise_ratio"] except: pass result_list_dict = defaultdict(list) evaluation_list = ["ACCURACY","F1_SCORE","AUC","G_MEAN"] for each in evaluation_list: result_list_dict[each] = [] np.random.seed(fixed_seed_num) # for reproducibility #num_selected_features = 30 #num_selected_features = 25#AS leak tab=0 #num_selected_features = 32#Slammer tab=0 num_selected_features = 33#Nimda tab=1 for tab_cv in range(cross_cv): if not tab_cv == 0 :continue epoch_training_loss_list = [] epoch_val_loss_list = [] #print(is_multi_scale) #using MLP to train if Label == "SVM": x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level, Normalize=0) print(Label+" is running..............................................") y_train = y_train0 clf = svm.SVC(kernel="rbf", gamma=0.00001, C=100000,probability=True) print(x_train.shape) clf.fit(x_train, y_train) result = clf.predict_proba(x_test) #return Evaluation.Evaluation(y_test, result) #results = Evaluation.Evaluation(y_test, result) elif Label == "SVMF": x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level, Normalize=5) print(Label+" is running..............................................") clf = svm.SVC(kernel="rbf", gamma=0.00001, C=100000,probability=True) print(x_train.shape) #x_train_new = SelectKBest(f_classif, k=num_selected_features).fit_transform(x_train, y_train0) #x_test_new = SelectKBest(f_classif, k=num_selected_features).fit_transform(x_test, y_test0) clf.fit(x_train, y_train0) result = clf.predict_proba(x_test) #return Evaluation.Evaluation(y_test, result) #results = Evaluation.Evaluation(y_test, result) elif Label == "SVMW": x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level, Normalize=6) print(Label + " is running..............................................") #SVR(kernel="linear") = svm.SVC(kernel="rbf", gamma=0.00001, C=100000, probability=True) estimator = svm.SVC(kernel="linear",probability=True) selector = RFE(estimator, num_selected_features, step=1) selector = selector.fit(x_train, y_train0) result = selector.predict_proba(x_test) # return Evaluation.Evaluation(y_test, result) # results = Evaluation.Evaluation(y_test, result) elif Label == "NBF": x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level, Normalize=10) print(Label + " is running..............................................") clf = MultinomialNB() clf.fit(x_train, y_train0) result = clf.predict_proba(x_test) elif Label == "NBW": x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level, Normalize=11) print(Label + " is running..............................................") #SVR(kernel="linear") = svm.SVC(kernel="rbf", gamma=0.00001, C=100000, probability=True) estimator = MultinomialNB() selector = RFE(estimator, num_selected_features, step=1) selector = selector.fit(x_train, y_train0) result = selector.predict_proba(x_test) # return Evaluation.Evaluation(y_test, result) # results = Evaluation.Evaluation(y_test, result) elif Label == "NB": x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level, Normalize=1) print(Label+" is running..............................................") y_train = y_train0 clf = MultinomialNB() clf.fit(x_train, y_train) result = clf.predict_proba(x_test) #return Evaluation.Evaluation(y_test, result) #results = Evaluation.Evaluation(y_test, result) elif Label == "DT": x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level, Normalize=2) print(Label+" is running.............................................."+str(x_train.shape)) y_train = y_train0 clf = tree.DecisionTreeClassifier() clf.fit(x_train, y_train) result = clf.predict_proba(x_test) #return Evaluation.Evaluation(y_test, result) #results = Evaluation.Evaluation(y_test, result) elif Label == "Ada.Boost": x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level, Normalize=0) print(Label+" is running.............................................."+str(x_train.shape)) y_train = y_train0 #clf = AdaBoostClassifier(n_estimators=10) #Nimda tab=1 clf = AdaBoostClassifier(n_estimators=10) clf.fit(x_train, y_train) result = clf.predict_proba(x_test) #return Evaluation.Evaluation(y_test, result) #results = Evaluation.Evaluation(y_test, result) elif Label == "MLP": x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level, Normalize=0) print(Label+" is running..............................................") batch_size = len(y_train) start = time.clock() model = Sequential() model.add(Dense(hidden_units, activation="relu", input_dim=33)) model.add(Dense(output_dim=number_class)) model.add(Activation("sigmoid")) # model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=epoch) #result = model.predict(X_Testing, batch_size=batch_size) result = model.predict(x_test) end = time.clock() print("The Time For MLP is " + str(end - start)) #return Evaluation.Evaluation(y_test, result) #results = Evaluation.Evaluation(y_test, result) #elif Label == "SVM-S": #x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData('Attention',filepath,filename,sequence_window,tab_cv,cross_cv) #x_train,y_train = Manipulation(x_train,y_train0,sequence_window) #x_test, y_test = Manipulation(x_test, y_test0, sequence_window) #clf = svm.SVC(kernel="rbf") #clf.fit(x_train, y_train) #result = clf.predict(x_test) #results = Evaluation.Evaluation_WithoutS(y_test, result) elif Label == "RNN": print(Label+" is running..............................................") start = time.clock() x_train_multi_list, x_train, y_train, x_testing_multi_list, x_test, y_test = LoadData.GetData(is_add_noise,noise_ratio,'Attention', filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level) batch_size = len(y_train) rnn_object = SimpleRNN(hidden_units, input_length=len(x_train[0]), input_dim=input_dim) model = Sequential() model.add(rnn_object) # X.shape is (samples, timesteps, dimension) #model.add(Dense(30, activation="relu")) #model.add(Dropout(0.2)) model.add(Dense(30, activation="sigmoid")) #model.add(Dropout(0.3)) # model.add(Dense(5,activation="tanh")) model.add(Dense(output_dim=number_class)) model.add(Activation("sigmoid")) # model.add(Activation("softmax")) # model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=epoch) #result = model.predict(X_Testing, batch_size=batch_size) result = model.predict(x_test) #return Evaluation.Evaluation(y_test, result) #results = Evaluation.Evaluation(y_test, result) end = time.clock() print("The Time For RNN is " + str(end - start)) # print(result) elif Label == "LSTM": print(Label+" is running..............................................") start = time.clock() x_train_multi_list, x_train, y_train, x_testing_multi_list, x_test, y_test = LoadData.GetData(is_add_noise,noise_ratio,'Attention',filepath, filename, sequence_window, tab_cv, cross_cv, Multi_Scale=is_multi_scale, Wave_Let_Scale=training_level) batch_size = len(y_train) lstm_object = LSTM(hidden_units, input_length=len(x_train[0]), input_dim=input_dim) model = Sequential() model.add(lstm_object) # X.shape is (samples, timesteps, dimension) # model.add(LSTM(lstm_size,return_sequences=True,input_shape=(len(X_Training[0]),33))) # model.add(LSTM(100,return_sequences=True)) # model.add(Dense(10, activation="tanh")) # model.add(Dense(5,activation="tanh")) model.add(Dense(30, activation="relu")) #model.add(Dropout(0.2)) #model.add(Dense(30, activation="sigmoid")) #model.add(Dropout(0.3)) # model.add(Dense(5,activation="tanh")) model.add(Dense(output_dim=number_class)) model.add(Activation("sigmoid")) #model.add(Activation("softmax")) # model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=epoch) #result = model.predict(X_Testing, batch_size=batch_size) result = model.predict(x_test) end = time.clock() print("The Time For LSTM is " + str(end - start)) if len(Parameters) > 0: return Evaluation.Evaluation(y_test, result)#Plotting AUC results = Evaluation.Evaluation(y_test, result)# Computing ACCURACY,F1-score,..,etc print(results) y_test2 = np.array(Evaluation.ReverseEncoder(y_test)) result2 = np.array(Evaluation.ReverseEncoder(result)) print("---------------------------1111111111111111") with open("StatFalseAlarm_"+filename+"_True.txt","w") as fout: for tab in range(len(y_test2)): fout.write(str(int(y_test2[tab]))+'\n') with open("StatFalseAlarm_"+filename+"_"+Label+"_"+"_Predict.txt","w") as fout: for tab in range(len(result2)): fout.write(str(int(result2[tab]))+'\n') print(result2.shape) print("---------------------------22222222222222222") for each_eval, each_result in results.items(): result_list_dict[each_eval].append(each_result) for eachk, eachv in result_list_dict.items(): result_list_dict[eachk] = np.average(eachv) #print(result_list_dict) if is_add_noise == False: with open(os.path.join(os.getcwd(),"Comparison_Log_"+filename+".txt"),"a")as fout: outfileline = Label+":__" fout.write(outfileline) for eachk,eachv in result_list_dict.items(): fout.write(eachk+": "+str(round(eachv,3))+",\t") fout.write('\n') else: with open(os.path.join(os.getcwd(),"Comparison_Log_Adding_Noise_"+filename+".txt"),"a")as fout: outfileline = Label+":__"+"Noise_Ratio_:"+str(noise_ratio) fout.write(outfileline) for eachk,eachv in result_list_dict.items(): fout.write(eachk+": "+str(round(eachv,3))+",\t") fout.write('\n') return results
f.close() eva_start_time = time.time() ini = Initialization(dataset_name, product_name) iniW = IniWallet(dataset_name, product_name, wallet_distribution_type) seed_cost_dict = ini.constructSeedCostDict() graph_dict = ini.constructGraphDict( cascade_model) product_list = ini.constructProductList() num_product = len(product_list) wallet_dict = iniW.constructWalletDict() eva = Evaluation(graph_dict, product_list, ppp_strategy, True) personal_prob_dict = eva.setPersonalPurchasingProbDict( wallet_dict) print('@ ' + model_name + ' evaluation @ dataset_name = ' + dataset_name + '_' + cascade_model + ', product_name = ' + product_name + ', wd = ' + wallet_distribution_type + ', ppp = ' + ppp_strategy) result10_pro_list = [] for _ in range(100): pro, pro_k_list, pnn_k_list = eva.getSeedSetProfit( seed_set, copy.deepcopy(wallet_dict), copy.deepcopy(personal_prob_dict))
opt = Oger.evaluation.Optimizer(gridsearch_parameters, evaluationFunction) #=========================================================================== # Uncomment the following to lines to use single process training. #=========================================================================== opt.scheduler = mdp.parallel.ProcessScheduler(n_processes=2, verbose=False) mdp.activate_extension("parallel") #Start gridsearch using nFolds and cross validation opt.grid_search(data, flow, n_folds=nFolds, cross_validate_function=Oger.evaluation.n_fold_random) #Plot minimum errors Evaluation.plotMinErrors(opt.errors, opt.parameters, opt.parameter_ranges, pp) #Plot errorspace along thee axis: i = 0 axisOne = -1 axisTwo = -1 axisThree = -1 for node , param in opt.parameters: if param == 'spectral_radius': axisOne = i elif param == 'leak_rate': axisTwo = i elif param == 'ridge_param': axisThree = i i =i+1
myNet20 = net.Dense() myNet20.AddLayer(5, isInput=True) myNet20.AddLayer(20, activationFunction=af.Sigmoid) myNet20.AddLayer(1, activationFunction=af.Sigmoid) start_time = time.time() myNet5.Train(dataset, target, iterationCount= 10, learningRateStart=.6, learningRateEnd=.0, regulationRate=.02) train5_Time = time.time() - start_time start_time = time.time() myNet20.Train(dataset, target, iterationCount= 10, learningRateStart=.6, learningRateEnd=.0, regulationRate=.02) train20_Time = time.time() - start_time # **************************************** Print Metrics ************************************************************* from sklearn.metrics import mean_squared_error t5, o5= ev.GetPredictions(myNet5, test_dataset, test_target) t20, o20= ev.GetPredictions(myNet20, test_dataset, test_target) print(" ") print("---------------------------------------------------------------------------------") print("-------------------------------- airfoil_self_noise -----------------------------") print(">>>>>>>>>>>>> 5 hidden units") print("mean_squared_error : {}".format(round(mean_squared_error(t5, o5),6))) print("Root_MSE : {}".format(round(math.sqrt(mean_squared_error(t5, o5)),6))) print("Training Time : {} Seconds".format(round(train5_Time,3))) print("- - - - - - - - - - - - - - - - - - - - - - - - - - -") print(">>>>>>>>>>>>> 20 hidden units") print("mean_squared_error : {}".format(round(mean_squared_error(t20, o20),6))) print("Root_MSE : {}".format(round(math.sqrt(mean_squared_error(t20, o20)),6))) print("Training Time : {} Seconds".format(round(train20_Time,3)))
def __init__(self, generate_prompts): configs = [] if not Settings.config.has_option("DEFAULT", "domains"): logger.error( "You must specify the domains under the DEFAULT section of the config" ) domains = Settings.config.get("DEFAULT", 'domains') logger.info('--Simulating dialogues over the domains: ', domains) self.possible_domains = domains.split(',') DomainUtils.checkDomainStrings(domainStrings=self.possible_domains) self.maxTurns = 30 if Settings.config.has_option("simulate", "maxturns"): configs.append('maxturns') self.maxTurns = Settings.config.getint("simulate", "maxturns") self.forceNullPositive = False if Settings.config.has_option("simulate", "forcenullpositive"): configs.append('forcenullpositive') self.forceNullPositive = Settings.config.getboolean( "simulate", "forcenullpositive") conf_scorer_name = 'additive' if Settings.config.has_option('simulate', 'confscorer'): conf_scorer_name = Settings.config.get('simulate', 'confscorer') if Settings.config.has_section('simulate'): for opt in Settings.config.options('simulate'): if opt not in configs and opt not in Settings.config.defaults( ): logger.error('Invalid config: ' + opt) # [MultiDomain?] Dialogue Management/policy. #----------------------------------------- self.topic_manager = TopicManager.TopicManager() # Simulated User. #----------------------------------------- # TODO - deal with multi domain simulation - whilst changing Settings.py ill just pass domain here for now logger.debug( 'simulate.py -- XXXXXXX -- directly passing domain name in simulate at present...' ) self.simulator = UserSimulator.SimulatedUsersManager( domainStrings=self.possible_domains) # Error Simulator. #----------------------------------------- # TODO - it is a hack for now passing the domain string directly from config via self.possible_domains. look at this #self.errorSimulator = ErrorSimulator.CuedErrorSimulator(conf_scorer_name, domainString=self.possible_domains[0]) self.errorSimulator = ErrorSimulator.SimulatedErrorManager( conf_scorer_name, self.possible_domains) # SemO. #----------------------------------------- self.semoClass = None if generate_prompts: self.semo_name = 'PassthroughSemO' if Settings.config.has_option('hub', 'semo'): self.semo_name = Settings.config.get('hub', 'semo') # SemO. if self.semo_name == 'PassthroughSemO': self.semoClass = SemO.PassthroughSemO() elif self.semo_name == 'BasicSemO': self.semoClass = SemO.BasicSemO() else: logger.warning('Invalid SemO: %s. Using PassthroughSemO.' % self.semo_name) self.semoClass = SemO.PassthroughSemO() # Evaluation Manager. #----------------------------------------- self.evaluator = Evaluation.EvaluationManager(self.possible_domains)
from Evaluation import * from DictRelevant import * import time startPoint=[".A",".T",".W",".B"] fileDoc="cisi/CISI.ALL" fileStopList="cacm/common_words" fileQuerry="cisi/CISI.QRY" fileRel='cisi/CISI.REL' sim_func_dict={1:'product',2:'product_log',3:'cosinus',4:'cosinus_log',5:'langue',6:'langue_corr',7:'BM25'} ind_sim_func=4 if __name__=="__main__": db= DictBase(startPoint) db.execute(fileDoc, fileStopList) # print db.word_dict['recognition'] #2634 OK #print db.tf_mat[2634] #OK ''' {396: 1, 653: 1, 1426: 1, 1044: 1, 89: 1, 474: 2, 927: 1, 1458: 2, 797: 2, 48: 1, 1202: 2, 94: 1, 568: 2, 1337: 1, 799: 1, 1341: 2, 908: 1, 1421: 1, 601: 1, 858: 1, 861: 1, 990: 1, 863: 1, 102: 2, 108: 1, 1134: 1, 495: 1, 241: 1, 890: 3, 1403: 1, 895: 1} ''' qr=Querry(startPoint,db.word_dict) qr.readQuerry(fileQuerry) sim =similarity(db, qr.querry_dict,ind_sim_func) dr=DictRelevant() dr.readRelevantQuerry(fileRel) print "finish readRel Dictionary" sim_func=sim_func_dict[ind_sim_func] eva= Evaluation(db, dr, -1, getattr(sim,sim_func)) #eva= Evaluation(db, dr, 100, sim.cosinus ) print eva.mean_avg_prec()
also use the ALT+SHIFT+E to execute single lines or whole code fragments I also recommend on Pycharm cell mode plugin for easier execution of code fragments (Noam) """ ## The cell seperator from BabelfyTester import BabelfyTester from DbWrapper import * from Evaluation import * from ModelTrainer import * ## path = ".." print "Loading iterators+db cache..." if(not os.path.isdir(path)): path = "C:\\Users\\Noam\\Documents\\GitHub\\DeepProject" wikiDB = WikipediaDbWrapper(user='******', password='******', database='wiki20151002') wikiDB.cacheArticleTable() iter_eval = WikilinksNewIterator(path+"/data/wikilinks/small/evaluation") babelfy_model = BabelfyTester(wikiDB, path + "/data/wikilinks/babelfy") evaluation = Evaluation(iter_eval,babelfy_model) try: evaluation.evaluate() except: print "nothing to do" babelfy_model.finalizeWriter()
train_df = pd.read_csv('data_2/u1.base', header=-1) train_df.columns = ['userId', 'movieId', 'rating', 'time'] train_df.drop('time', axis=1, inplace=True) test_df = pd.read_csv('data_2/u1.test', header=-1) test_df.columns = ['userId', 'movieId', 'rating', 'time'] test_df.drop('time', axis=1, inplace=True) elif dataset == '1M': train_df = pd.read_csv('data_2/train.csv', header=0) train_df.columns = ['userId', 'movieId', 'rating'] test_df = pd.read_csv('data_2/test.csv', header=0) test_df.columns = ['userId', 'movieId', 'rating'] # Ranking (Precision-Recall) prec, recall = Evaluation.precision_recall(user_recs, train_df.values.tolist(), test_df.values.tolist(), at_top_n) print 'Precision = %.4f\nRecall = %.4f' % (prec, recall) # Rating Prediction preds, actuals = recommender.predict_test(user_knn_model, test_df.values.tolist()) rmse = Evaluation.rmse(preds, actuals) print 'User kNN rmse', rmse # preds, actuals = recommender.predict_test(item_knn_model, test_df.values.tolist()) # rmse = Evaluation.rmse(preds, actuals) # print 'Item kNN rmse', rmse import numpy as np print 'mean predictions', np.mean(preds) print 'mean actual', np.mean(actuals)
def test_accuracy(y_pred): y_true = [0, 1] expect = accuracy_score(y_true, y_pred) assert ev.accuracy(y_true, y_pred) == expect
WNS.simulationModel.nodes = [varp] + WNS.simulationModel.nodes vdhcp = VirtualDHCPServer("vDHCP@", "theOnlySubnet", "192.168.0.2", "192.168.254.253", "255.255.0.0") WNS.simulationModel.nodes.append(vdhcp) vdns = VirtualDNSServer("vDNS", "ip.DEFAULT.GLOBAL") WNS.simulationModel.nodes.append(vdns) # Configure probes for evaluation Evaluation.installEvaluation(sim = WNS, loggingStations = range(1, configuration.numberOfStations + 1), dll = WNS.simulationModel.nodes[1].dll, maxPacketDelay = 0.5, # s maxPacketSize = 2000*8, # Bit maxBitThroughput = 10E6, # Bit/s maxPacketThroughput = 1E6, # Packets/s delayResolution = 1000, sizeResolution = 2000, throughputResolution = 10000) node = openwns.evaluation.createSourceNode(WNS, "glue.phyTrace") node.getLeafs().appendChildren( openwns.evaluation.JSONTrace(key="__json__", description="JSON testing in PhyUser")) #openwns.evaluation.default.installEvaluation(sim = WNS) openwns.setSimulator(WNS)
parser.add_argument('--Style','-sty',type=str,help='Style for training the network [default: Full]' ' [options: Plain, Full]',default='Full') parser.add_argument('--Network','-net',type=str,help='Network used for training the network [default: DGCNN]' ' [options: DGCNN, PointNet++(not supported yet)]',default='DGCNN') args = parser.parse_args() ##### Set specified GPU to be active if args.GPU != -1: os.environ['CUDA_VISIBLE_DEVICES'] = str(args.GPU) ##### Load Training/Testing Data Loader = IO.ShapeNetIO('./Dataset/ShapeNet',batchsize = args.batchsize) Loader.LoadTrainValFiles() ##### Evaluation Object Eval = Evaluation.Eval() ## Number of categories PartNum = Loader.NUM_PART_CATS output_dim = PartNum ShapeCatNum = Loader.NUM_CATEGORIES #### Export results Directories if args.ExpRslt: dt = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") # get current time BASE_PATH = os.path.expanduser('./Results/ShapeNet/{}_sty-{}_m-{}_{}'.format(args.Network, args.Style, args.m, dt)) SUMMARY_PATH = os.path.join(BASE_PATH,'Summary'.format(args.m)) PRED_PATH = os.path.join(BASE_PATH,'Prediction'.format(args.m)) CHECKPOINT_PATH = os.path.join(BASE_PATH,'Checkpoint'.format(args.m)) if not os.path.exists(BASE_PATH):
# creating a csv writer object csvwriter = csv.writer(csvfile) # writing the fields csvwriter.writerow(fields) # writing the data rows csvwriter.writerows(rows) return None if __name__ == '__main__': # arr = {1:2,3:4,5:1} # print(max(arr.values())) preProcess = PreProcess.PreProcess() eval = Evaluation.Evaluation() # for n in range(40, 60, 2): kmeans = Kmaens.Kmeans(n, preProcess.vectorize_tf_idf()) print(eval.purity(n, kmeans.y, preProcess.labels)) # data_vectors_tf_idf = preProcess.vectorize_tf_idf() # data_vectors_wv = preProcess.word2wec() # optimal_n = len(set(preProcess.labels)) # # Gaussian Mixture Model # print("Gaussian Mixture Model(tf-idf):") # gmm = GMMCluster.GMMCluster(data_vectors_tf_idf[:100], 5) # cluster = gmm.cluster("tf-idf") # print("ARI= ", eval.adjusted_rand_index(preProcess.labels[:100], cluster)) # print("NMI= ", eval.normalized_mutual_information(preProcess.labels[:100], cluster))
f.write(str(seed)) f.write('\n') def load_seeds(self, seeds_path): seeds = [] with open(seeds_path, 'r') as f: for seed in f.read(): seeds.append(seed) return seeds def draw_graph(self): pos = nx.spring_layout(self.graph) edge_labels = dict([(( u, v, ), d['weight']) for u, v, d in self.graph.edges(data=True)]) nx.draw_networkx_edge_labels(self.graph, pos, edge_labels=edge_labels) nx.draw(self.graph, pos=pos, node_size=100, arrows=True) plt.show() if __name__ == '__main__': excofim = extended_CoFIM('../weighted_directed_nets/network.dat', '../weighted_directed_nets/community.dat', 0, 1000) seeds = excofim.node_expansion(50, 3) print seeds inf = Evaluation.monte_carlo_extend2(excofim, list(seeds), num_simu=100) print "Total influence:", inf # excofim.draw_graph()
def test_precision(y_pred): y_true = [0, 1] expect = precision_score(y_true, y_pred) assert ev.precision(y_true, y_pred) == expect
def test_confusion_matrix_multi(y_pred): y_true = [0, 1, 2] expect = confusion_matrix(y_true, y_pred, labels=y_true).flatten().tolist() assert ev.confusion_matrix(y_true, y_pred, lbl=y_true) == expect
###--------------------DEBUG STATEMENTS---------------------- #print cat , cat_num_docs[cat]/len(trainset) ###--------------------DEBUG STATEMENTS---------------------- neg_log_prob=-log(cat_num_docs[cat]/len(trainset)) word_dict = cat_word_dict[cat] count_cat = cat_word_count_dict[cat] for w in list_words: count_word_train=word_dict.get(w,0) ratio = (count_word_train+1)/(count_cat+vocab_length) neg_log_prob-=log(ratio) if minimum_neg_log_prob>neg_log_prob: min_category=cat minimum_neg_log_prob=neg_log_prob li_results.append((file_name,min_category,f2c(corpus,file_name))) ###--------------------DEBUG STATEMENTS---------------------- #for t in li_results: # print t ###--------------------DEBUG STATEMENTS---------------------- if binary_classification: Evaluation.evaluation_binary(li_results) else: Evaluation.evaluation_multi_class(li_results,cat_num_docs.keys()) print "The time taken by the trained classifier to assign labels" print time.time() - start_time, "seconds"
def Evaluation_task(self): # Evaluation Code evaluate = Evaluation.Evaluation() evaluate.create_files()
K = env.nbArms; policies = [UCB(K, trunc), UCBV(K, trunc), klUCB(K, trunc), klUCB(K, klucb=klucbPoisson), KLempUCB(K, trunc)] else: # Third scenario: Truncated exponential distributions trunc = 10 env = MAB([Exponential(1./p, trunc) for p in range(1, 6)]) K = env.nbArms; policies = [UCB(K, trunc), UCBV(K, trunc), klUCB(K, trunc), klUCB(K, klucb=klucbExp), KLempUCB(K, trunc)] tsav = int_(linspace(100,horizon-1,200)) if graphic == 'yes': figure(1) k=0 for policy in policies: ev = Evaluation(env, policy, nbRep, horizon, tsav) print ev.meanReward() print ev.meanNbDraws() meanRegret = ev.meanRegret() if graphic == 'yes': semilogx(1+tsav, meanRegret, color = colors[k]) xlabel('Time') ylabel('Regret') k = k+1 if graphic == 'yes': legend([policy.__class__.__name__ for policy in policies], loc=0) title('Average regret for various policies') show()
def test_f1(y_pred): y_true = [0, 1] expect = f1_score(y_true, y_pred) assert ev.f1(y_true, y_pred) == expect
maxhumid = condprob(day.humidMax, noneVars[5], noneMeans[5]) minhumid = condprob(day.humidMin, noneVars[6], noneMeans[6]) pressure = condprob(day.pressure, noneVars[7], noneMeans[7]) meanwind = condprob(day.meanWindSpeed, noneVars[8], noneMeans[8]) maxwind = condprob(day.maxWindSpeed, noneVars[9], noneMeans[9]) maxgust = condprob(day.maxGustSpeed, noneVars[10], noneMeans[10]) visibility = condprob(day.visibility, noneVars[11], noneMeans[11]) #Calculate the entire probability for a none day noneprobability = priornone * meantemp * maxtemp * mintemp * dewpoint * meanhumid * maxhumid * minhumid * pressure * meanwind * maxwind * maxgust * visibility #Take the max value of the probabilities - the highest one is the prediction if (max(noneprobability, rainprobability, snowprobability, fogprobability) == noneprobability): predictions.append("None") elif (max(noneprobability, rainprobability, snowprobability, fogprobability) == rainprobability): predictions.append("Rain") elif (max(noneprobability, rainprobability, snowprobability, fogprobability) == snowprobability): predictions.append("Snow") elif (max(noneprobability, rainprobability, snowprobability, fogprobability) == fogprobability): predictions.append("Fog") ###End of testing####### # Output print "\nPrediction accuracy = %d%%\n" % Evaluation.evaluate( testingData, predictions)
def Model(each_case,Label,Parameters=[]): global filepath, filename, fixed_seed_num, sequence_window, number_class, hidden_units, input_dim, learning_rate, epoch, is_multi_scale, training_level, cross_cv, wave_type, is_add_noise, noise_ratio, pooling_type,corss_val_label try: filepath = Parameters["filepath"] filename = Parameters["filename"] sequence_window = Parameters["sequence_window"] number_class = Parameters["number_class"] hidden_units = Parameters["hidden_units"] input_dim = Parameters["input_dim"] learning_rate = Parameters["learning_rate"] epoch = Parameters["epoch"] training_level = Parameters["training_level"] cross_cv = Parameters["cross_cv"] fixed_seed_num = Parameters["fixed_seed_num"] wave_type = Parameters["wave_type"] is_add_noise = Parameters["is_add_noise"] is_multi_scale = Parameters["is_multi_scale"] noise_ratio = Parameters["noise_ratio"] pooling_type = Parameters["pooling_type"] except: pass result_list_dict = defaultdict(list) evaluation_list = ["ACCURACY","F1_SCORE","AUC","G_MEAN"] for each in evaluation_list: result_list_dict[each] = [] for tab_cv in range(cross_cv): if not tab_cv == corss_val_label: continue print("******************************"+str(tab_cv)) #if corss_val_label == False: #if 'Nimda' in filename: #if not tab_cv == 1: continue #else: #if not tab_cv == 1 :continue#AS Leak, Code Red I, Slammer #else: #pass x_train, y_train,x_test, y_test = LoadData.GetData(pooling_type,is_add_noise,noise_ratio,'Attention',filepath, filename, sequence_window,tab_cv,cross_cv,Multi_Scale=is_multi_scale,Wave_Let_Scale=training_level,Wave_Type=wave_type) batch_size = min(len(y_train),len(y_test)) #batch_size = Parameters["batch_size"] #x_train = x_train_multi_list #x_test = x_testing_multi_list #batch_size = 10 if Label == "MS-LSTM": tf.reset_default_graph() tf.set_random_seed(fixed_seed_num) num_neurons = hidden_units # Network building if is_multi_scale == True and each_case == 2: #umber_scale_levels = training_level #u_w = tf.Variable(tf.random_normal(shape=[1,number_scale_levels]), name="u_w") #data_original_train = tf.placeholder(tf.float32,[number_scale_levels,batch_size,sequence_window,input_dim]) #output_data_original_train = tf.Print(data_original_train,[data_original_train],"The Original Train is :",first_n=4096,summarize=40) #data_original_train2 = tf.transpose(data_original_train,[1,2,3,0]) #data_original_train_merged = batch_vm2(data_original_train2,tf.transpose(u_w_scales_normalized)) #data_original_train_merged = tf.reshape(data_original_train_merged,(batch_size,sequence_window,input_dim)) #lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_neurons, forget_bias=1.0, activation=tf.nn.tanh) #val_list, state_list = [tf.nn.dynamic_rnn(lstm_cell, tf.gather(data_original_train2,i), dtype=tf.float32) for i in range(number_scale_levels)] #print(val_list) #val = tf.transpose(val,[1,0,2]) #val2_list = [tf.gather(tf.gather(val_list,i),val.get_shape()[0]-1) for i in range(number_scale_levels)] #val = tf.reshape(val,[batch_size*number_of_scales,num_neurons]) #out_put_val = tf.Print(val_list,[val_list],"The val shape is :",first_n=4096,summarize=40) #out_put_val2 = tf.Print(val2_list,[val2_list],"The val2 shape is :",first_n=4096,summarize=40) #Weight_W = tf.Variable(tf.truncated_normal([num_neurons,sequence_window])) #out_put_Weight_W = tf.Print(Weight_W,[Weight_W],"The Weight_W is :",first_n=1024,summarize=10) #b_W = tf.Variable(tf.constant(0.1, shape=[sequence_window,sequence_window])) #out_put_b_W = tf.Print(b_W,[b_W.get_shape()],"The b_W shape is :",first_n=1024,summarize=10) #u_current_levels_temp = tf.matmul(val2,Weight_W)+b_W #out_put_u_current_levels_b_W = tf.Print(b_W,[b_W],"The b_W shape is :",first_n=4096,summarize=40) #out_put_u_current_levels_temp = tf.Print(u_current_levels_temp,[u_current_levels_temp],"The u_current_levels_temp is :",first_n=4096,summarize=40) #out_put_u_current_u_w = tf.Print(u_w,[u_w],"The u_w shape is :",first_n=4096,summarize=40) #u_current_levels_total = tf.gather(tf.cumsum(tf.exp(batch_vm(u_current_levels_temp,tf.transpose(u_w)))),sequence_window-1) #print(tf.transpose(u_w).get_shape()) #out_put_u_current_levels_total = tf.Print(u_current_levels_total,[u_current_levels_total],"The u_current_levels_total shape is :",first_n=4096,summarize=40) #out_put_u_w_scale = tf.Print(u_w_scales_normalized,[u_w_scales_normalized],"The u_w_scales shape is ----------------:",first_n=4096,summarize=40) #u_current_levels = tf.div(tf.exp(batch_vm(u_current_levels_temp,tf.transpose(u_w))),u_current_levels_total) #out_put_u_current_levels = tf.Print(u_current_levels,[u_current_levels],"The u_current_levels shape is :",first_n=4096,summarize=40) #target = tf.placeholder(tf.float32, [batch_size, number_class]) #print("-----------------------------%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") #print(val.get_shape()) #m_total = batch_vm(tf.transpose(u_current_levels),val) #u_w_scales_normalized = u_current_levels #tf.assign(u_w_scales_normalized,u_current_levels) #m_total = tf.mul(tf.transpose(u_current_levels),val) #print(m_total.get_shape()) #out_put_m_total_shape = tf.Print(m_total,[m_total.get_shape()],"The m_total shape is :",first_n=4096,summarize=40) #out_put_m_total = tf.Print(m_total,[m_total],"The m_total is :",first_n=4096,summarize=40) #weight = tf.Variable(tf.truncated_normal([num_neurons, int(target.get_shape()[1])])) #bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]])) #prediction = tf.nn.softmax(tf.matmul(m_total, weight) + bias) #out_put_prediction = tf.Print(prediction,[prediction.get_shape()],"The prediction shape is :",first_n=1024,summarize=10) #print(prediction.get_shape()) number_scale_levels = training_level u_w_scales_normalized = tf.Variable(tf.constant(1.0/number_scale_levels,shape=[1,number_scale_levels]), name="u_w") u_w_scales_normalized = normalized_scale_levels(u_w_scales_normalized) u_w = tf.Variable(tf.random_normal(shape=[1,sequence_window]), name="u_w") data_original_train = tf.placeholder(tf.float32,[number_scale_levels,batch_size,sequence_window,input_dim]) output_data_original_train = tf.Print(data_original_train,[data_original_train],"The Original Train is :",first_n=4096,summarize=40) #data_original_train = tf.placeholder(tf.float32,[batch_size,sequence_window,input_dim]) data_original_train2 = tf.transpose(data_original_train,[1,2,3,0]) data_original_train_merged = batch_vm2(data_original_train2,tf.transpose(u_w_scales_normalized)) data_original_train_merged = tf.reshape(data_original_train_merged,(batch_size,sequence_window,input_dim)) lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_neurons, forget_bias=1.0, activation=tf.nn.tanh) val, state = tf.nn.dynamic_rnn(lstm_cell, data_original_train_merged, dtype=tf.float32) #val = tf.transpose(val,[1,0,2]) val2 = tf.gather(val,val.get_shape()[0]-1) #val = tf.reshape(val,[batch_size*number_of_scales,num_neurons]) out_put_val = tf.Print(val,[val.get_shape()],"The val shape is :",first_n=4096,summarize=40) out_put_val2 = tf.Print(val2,[val2.get_shape()],"The val2 shape is :",first_n=4096,summarize=40) Weight_W = tf.Variable(tf.truncated_normal([num_neurons,sequence_window])) out_put_Weight_W = tf.Print(Weight_W,[Weight_W],"The Weight_W is :",first_n=1024,summarize=10) b_W = tf.Variable(tf.constant(0.1, shape=[sequence_window,sequence_window])) out_put_b_W = tf.Print(b_W,[b_W.get_shape()],"The b_W shape is :",first_n=1024,summarize=10) #tf.reshape(tf.matmul(tf.reshape(Aijk,[i*j,k]),Bkl),[i,j,l]) #u_current_levels_temp = tf.reshape(tf.mul(tf.reshape(val,[batch_size*num_neurons],Weight_W)+b_W #print("val shape is ") #print(val2.get_shape()) #print(Weight_W.get_shape()) #print(b_W.get_shape()) u_current_levels_temp = tf.matmul(val2,Weight_W)+b_W out_put_u_current_levels_b_W = tf.Print(b_W,[b_W],"The b_W shape is :",first_n=4096,summarize=40) out_put_u_current_levels_temp = tf.Print(u_current_levels_temp,[u_current_levels_temp],"The u_current_levels_temp is :",first_n=4096,summarize=40) out_put_u_current_u_w = tf.Print(u_w,[u_w],"The u_w shape is :",first_n=4096,summarize=40) u_current_levels_total = tf.gather(tf.cumsum(tf.exp(batch_vm(u_current_levels_temp,tf.transpose(u_w)))),sequence_window-1) #print(tf.transpose(u_w).get_shape()) out_put_u_current_levels_total = tf.Print(u_current_levels_total,[u_current_levels_total],"The u_current_levels_total shape is :",first_n=4096,summarize=40) out_put_u_w_scale = tf.Print(u_w_scales_normalized,[u_w_scales_normalized],"The u_w_scales shape is ----------------:",first_n=4096,summarize=40) u_current_levels = tf.div(tf.exp(batch_vm(u_current_levels_temp,tf.transpose(u_w))),u_current_levels_total) out_put_u_current_levels = tf.Print(u_current_levels,[u_current_levels],"The u_current_levels shape is :",first_n=4096,summarize=40) target = tf.placeholder(tf.float32, [batch_size, number_class]) #print("-----------------------------%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") #print(val.get_shape()) m_total = batch_vm(tf.transpose(u_current_levels),val) #u_w_scales_normalized = u_current_levels #tf.assign(u_w_scales_normalized,u_current_levels) #m_total = tf.mul(tf.transpose(u_current_levels),val) #print(m_total.get_shape()) out_put_m_total_shape = tf.Print(m_total,[m_total.get_shape()],"The m_total shape is :",first_n=4096,summarize=40) out_put_m_total = tf.Print(m_total,[m_total],"The m_total is :",first_n=4096,summarize=40) weight = tf.Variable(tf.truncated_normal([num_neurons, int(target.get_shape()[1])])) bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]])) prediction = tf.nn.softmax(tf.matmul(m_total, weight) + bias) out_put_prediction = tf.Print(prediction,[prediction.get_shape()],"The prediction shape is :",first_n=1024,summarize=10) #print(prediction.get_shape()) else: try: number_scale_levels = training_level u_w_scales_normalized = tf.Variable(tf.constant(1.0/number_scale_levels,shape=[1,number_scale_levels]), name="u_w") u_w_scales_normalized = normalized_scale_levels(u_w_scales_normalized) u_w = tf.Variable(tf.random_normal(shape=[1,sequence_window]), name="u_w") data_original_train = tf.placeholder(tf.float32,[number_scale_levels,batch_size,sequence_window,input_dim]) output_data_original_train = tf.Print(data_original_train,[data_original_train],"The Original Train is :",first_n=4096,summarize=40) #data_original_train = tf.placeholder(tf.float32,[batch_size,sequence_window,input_dim]) data_original_train2 = tf.transpose(data_original_train,[1,2,3,0]) data_original_train_merged = batch_vm2(data_original_train2,tf.transpose(u_w_scales_normalized)) data_original_train_merged = tf.reshape(data_original_train_merged,(batch_size,sequence_window,input_dim)) lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_neurons, forget_bias=1.0, activation=tf.nn.tanh) val, state = tf.nn.dynamic_rnn(lstm_cell, data_original_train_merged, dtype=tf.float32) target = tf.placeholder(tf.float32, [batch_size, number_class]) except: data_original_train = tf.placeholder(tf.float32, [None,sequence_window,input_dim]) target = tf.placeholder(tf.float32, [None, number_class]) lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_neurons, forget_bias=1.0, activation=tf.nn.tanh) val, state = tf.nn.dynamic_rnn(lstm_cell, data_original_train, dtype=tf.float32) val = tf.transpose(val, [1, 0, 2]) last = tf.gather(val, int(val.get_shape()[0]) - 1) weight = tf.Variable(tf.truncated_normal([num_neurons, int(target.get_shape()[1])])) bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]])) prediction = tf.nn.softmax(tf.matmul(last, weight) + bias) #cost_cross_entropy = -tf.reduce_mean(target * tf.log(prediction)) cost_cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(prediction, target, name=None)) # Sigmoid #optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) minimize = optimizer.minimize(cost_cross_entropy) #mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1)) #error = tf.reduce_mean(tf.cast(mistakes, tf.float32)) correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(target, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) init_op = tf.initialize_all_variables() sess = tf.Session() sess.run(init_op) no_of_batches = int(len(y_train) / batch_size) epoch_training_loss_list = [] epoch_training_acc_list = [] epoch_val_loss_list = [] epoch_val_acc_list = [] weight_list=[] early_stopping = 100 epoch_stop = epoch for i in range(epoch): if early_stopping > 0: pass else: epoch_stop = i+1 break ptr = 0 for j in range(no_of_batches): inp, out = x_train[:,ptr:ptr + batch_size], y_train[ptr:ptr + batch_size] inp2, out2 = x_test[:,ptr:ptr + batch_size], y_test[ptr:ptr + batch_size] #print("INPUT IS ") #print(inp) #print("OUTPUT IS ") #print(inp2) #da.Plotting_Sequence(inp,out) try: #pass sess.run(out_put_u_w_scale, {data_original_train: inp, target: out}) #sess.run(output__1,{data_original_train: inp, target: out}) #sess.run(output0,{data_original_train: inp, target: out}) #sess.run(output1,{data_original_train: inp, target: out}) #print("11111") #print(out_put_u_w_scale) #print("22222") #print(normalized_scale_levels(out_put_u_w_scale)) #print(normalized_scale_levels(out_put_u_w_scale).shape) #sess.run(tf.assign(u_w_scales,normalized_scale_levels(out_put_u_w_scale))) #sess.run(out_put_original_train, {data_original_train: inp, target: out}) sess.run(out_put_val, {data_original_train: inp, target: out}) sess.run(out_put_val2, {data_original_train: inp, target: out}) #sess.run(out_put_Weight_W, {data_original_train: inp, target: out}) #sess.run(out_put_u_current_levels_temp, {data_original_train: inp, target: out}) #sess.run(out_put_u_current_u_w, {data_original_train: inp, target: out}) #sess.run(out_put_u_current_levels_b_W, {data_original_train: inp, target: out}) #sess.run(out_put_u_current_levels_total, {data_original_train: inp, target: out}) weight_list.append(sess.run(u_current_levels, {data_original_train: inp, target: out})) #sess.run(out_put_m_total, {data_original_train: inp, target: out}) #sess.run(out_put_m_total_shape, {data_original_train: inp, target: out}) #sess.run(out_put_prediction, {data_original_train: inp, target: out}) except: pass #print(out) ptr += batch_size print(inp.shape) sess.run(minimize, {data_original_train: inp,target: out}) training_acc,training_loss = sess.run((accuracy,cost_cross_entropy),{data_original_train: inp, target: out}) #sess.run(out_put_before_multi_first_level,{data_original_train: inp, target: out}) #sess.run(output_data_for_lstm_multi_scale,{data_original_train: inp, target: out}) epoch_training_loss_list.append(training_loss) epoch_training_acc_list.append(training_acc) #sess.run(out_put_before_multi_first_level,{data_original_train: inp, target: out}) #sess.run(out_put_before_multi_second_level,{data_original_train: inp, target: out}) #sess.run(out_put_before_multi_third_level,{data_original_train: inp, target: out}) #sess.run(out_put_after_multi_level,{data_original_train: inp, target: out}) #sess.run(minimize, {data_original_train: inp2,target: out2}) val_acc,val_loss = sess.run((accuracy,cost_cross_entropy),{data_original_train: inp2, target: out2}) epoch_val_loss_list.append(val_loss) epoch_val_acc_list.append(val_acc) print("Epoch %s"%(str(i+1))+">"*20+"="+"train_accuracy: %s, train_loss: %s"%(str(training_acc),str(training_loss))\ +",\tval_accuracy: %s, val_loss: %s"%(str(val_acc),str(val_loss))) try: max_val_acc = epoch_val_acc_list[-2] except: max_val_acc = 0 if epoch_val_acc_list[-1] < max_val_acc: early_stopping -= 1 elif epoch_val_acc_list[-1] >= max_val_acc: early_stopping = 100 #incorrect = sess.run(error, {data: x_test, target: y_test}) #print("x_test shape is ..."+str(x_test.shape)) #print(x_test) try: result = sess.run(prediction, {data_original_train: x_test, target: y_test}) except: x_test = x_test[0:batch_size] y_test = y_test[0:batch_size] result = sess.run(prediction, {data_original_train: x_test, target: y_test}) #print(result) #print("shape is ("+str(len(result))+","+str(len(result[0]))+')') #print('Epoch {:2d} error {:3.1f}%'.format(i + 1, 100 * incorrect)) #if training_level > 0: #scale_weight = sess.run(scale_weight, {data_original_train: x_test, target: y_test}) #print("The final scale weight is :\n") #print(scale_weight) #save_path = saver.save(sess, os.path.join(os.getcwd(),"modelckpt.txt")) #aaa = saver.restore(sess, os.path.join(os.getcwd(),"modelckpt.txt")) #all_variables = tf.trainable_variables() #var = [v for v in tf.trainable_variables() if v.name == "scale_weight"] sess.close() elif Label == "MS-LSTMB": pass results = Evaluation.Evaluation(y_test, result)#Computing ACCURACY, F1-Score, .., etc try: for each_eval, each_result in results.items(): result_list_dict[each_eval].append(each_result) if len(Parameters) > 0: label = "PW" else: label = "DA" except: label = "AUC" #if len(Parameters) > 0: #try: #for each_eval, each_result in results.items(): #result_list_dict[each_eval].append(each_result) #label = "PW" #with open(os.path.join(os.getcwd(), "TensorFlow_Log" + filename + ".txt"), "a")as fout: # if training_level > 0: # outfileline = Label + "_____epoch:" + str(epoch) + ",_____learning rate:" + str(learning_rate) + ",_____multi_scale:" + str(is_multi_scale) + "\n" #else: # outfileline = Label + "_____epoch:" + str(epoch) + ",_____learning rate:" + str(learning_rate) + ",_____multi_scale:" + str(is_multi_scale) + ",_____train_set_using_level:" + str(training_level) + "\n" #fout.write(outfileline) #for eachk, eachv in result_list_dict.items(): # fout.write(eachk + ": " + str(round(eachv, 3)) + ",\t") #fout.write('\n') #return results #except: #label = "AUC" #return Evaluation.Evaluation(y_test, result)#Plotting AUC #else: #for each_eval, each_result in results.items(): #result_list_dict[each_eval].append(each_result) #label = "da" #if label == "AUC": return results if label == "DA": pass """ y_test2 = np.array(Evaluation.ReverseEncoder(y_test)) result2 = np.array(Evaluation.ReverseEncoder(result)) with open("StatFalseAlarm_"+filename+"_True.txt","w") as fout: for tab in range(len(y_test2)): fout.write(str(int(y_test2[tab]))+'\n') with open("StatFalseAlarm_"+filename+"_"+Label+"_"+"_Predict.txt","w") as fout: for tab in range(len(result2)): fout.write(str(int(result2[tab]))+'\n') """ try: for eachk, eachv in result_list_dict.items(): result_list_dict[eachk] = np.average(eachv) print(result_list_dict) if is_add_noise == False: if corss_val_label == 0: outputfilename = "Tab_A_MS-LSTM_Log_"+filename+".txt" else: outputfilename = "Tab_B_MS-LSTM_Log_"+filename+".txt" with open(os.path.join(os.getcwd(),outputfilename),"a")as fout: if training_level>0: outfileline = Label+"_epoch:"+str(epoch_stop)+",__wavelet type:"+str(wave_type)+",__pooling type:"+str(pooling_type)+",__learning rate:"+str(learning_rate)+",__multi_scale:"+str(is_multi_scale)+",__scale_levels:"+str(training_level)+",__sequence_window:"+str(sequence_window)+"\n" else: outfileline = Label+"_epoch:"+str(epoch_stop)+",__wavelet type:"+str(wave_type)+",__learning rate:"+str(learning_rate)+",__multi_scale:"+str(is_multi_scale)+",__scale_levels:"+str(training_level)+",__sequence_window:"+str(sequence_window)+"\n" fout.write(outfileline) for eachk,eachv in result_list_dict.items(): fout.write(eachk+": "+str(round(eachv,3))+",\t") fout.write('\n') else: with open(os.path.join(os.getcwd(), "MS-LSTM_Log_Adding_Noise_" + filename + ".txt"), "a")as fout: if training_level > 0: outfileline = Label + "_____epoch:" + str(epoch_stop) +",_____pooling type:"+str(pooling_type)+ ",_____learning rate:" + \ str(learning_rate) + ",_____multi_scale:" + str(is_multi_scale) + "\n" else: outfileline = Label + "_____epoch:" + str(epoch_stop) + ",_____pooling type:"+str(pooling_type)+ ",_____learning rate:" + \ str(learning_rate) + ",_____multi_scale:" + str(is_multi_scale) + ",_____train_set_using_level:" + str(training_level) + "\n" fout.write(outfileline) for eachk, eachv in result_list_dict.items(): fout.write(eachk + ": " + str(round(eachv, 3)) + ",\t") fout.write('\n') except: pass #print("lallala") #print(epoch_training_loss_list) if not "DA"==label: return results return epoch_training_loss_list,epoch_val_loss_list,epoch_training_acc_list,epoch_val_acc_list,weight_list,results
for img in Test: A, B, C = TD.get_indeces(img) B1 = B.reshape(np.prod(B.shape)) batch = 1000 num_batches = A.shape[0] / batch Sha = B.shape preds = np.zeros(shape = (A.shape[0], NC )) for i in range(num_batches): idx = range(i*batch, (i+1)*batch) K = A[idx] M, N , O= TD.Patch_gen(K,PS, C) preds[idx] = f_eval(M, N, O) if num_batches*batch < A.shape[0]: tot = num_batches*batch K = A[tot:] M, N, O = TD.Patch_gen(K, PS, C) preds[tot:A.shape[0]] = f_eval(M, N, O) P = np.argmax(preds, axis = -1) MM = np.ravel_multi_index(A.T, np.asarray(B.shape)) Final_pred = np.zeros(B1.shape) Final_pred[MM] = P Lab = B1.reshape(Sha) Segs = Final_pred.reshape(Sha) Dice = np.append(Dice, [E.Dice_score(Segs, Lab, 1)]) print Dice io.savemat("/home/xvt131/Biomediq/Results/valiBrain/%s" %(img[45:60]), mdict= {"Seg":Segs,"Lab":Lab} )
def test_confusion_matrix(y_pred): y_true = [0, 1] expect = confusion_matrix(y_true, y_pred).flatten().tolist() assert ev.confusion_matrix(y_true, y_pred) == expect