Exemplo n.º 1
0
def trainWithDF(sparkContext, rawDataFrame):
	simMat = cm.getSimilarityMatrixByRDD(sparkContext, rawDataFrame)
	fp.recordSimMatrix(simMat, pv.simMatrixFile)
	model, unifiedRDDVecs = cluster.getClusterModel(sparkContext, simMat, rawDataFrame, (pv.truncateLineCount/pv.IDFOREACHCLUSTER), pv.dimensionReductionNum, pv.eigenVecFile)
	eva.evaluateModel(model, unifiedRDDVecs)
	fp.outputNodesInSameCluster(model, unifiedRDDVecs, rawDataFrame, pv.clusterIDCenterFile, pv.clusterIDFile)
	decisionTreeModel = classification.process(sparkContext, (pv.truncateLineCount/pv.IDFOREACHCLUSTER), pv.treeMaxDepth, pv.treeMaxBins, pv.eigenVecFile, pv.clusterIDFile)
	model.save(sparkContext, pv.clusterModelPath)
	Utils.logMessage("\nTrain cluster model finished")
Exemplo n.º 2
0
 def evaluate(self, config=None):
     if config is None:
         config = self._config["evaluation"]
     evaluation = Evaluation(self.iterators[config['iterator']],
                             self.model,
                             self.candidator,
                             self.stats[config['stats']],
                             sampling=config['sampling'] if 'sampling' in config else None,
                             log_path=self.path + "/evaluation.txt",
                             db=self.db,
                             trained_mentions=self.trained_mentions)
     evaluation.evaluate()
Exemplo n.º 3
0
def train(sparkContext):
	fp.truncate(pv.mergedAccountFile, pv.truncatedFile, pv.truncateLineCount)
	fp.preprocess(pv.truncatedFile, pv.processedFile, pv.targetFields)
	pd.read_csv(pv.trainingFile, sep=',',encoding='utf-8').to_csv(pv.fileForClusterModel, index=False, encoding='utf-8')
	rawDataFrame = pd.read_csv(pv.fileForClusterModel, sep=',',encoding='utf-8')
	simMat = cm.getSimilarityMatrix(sparkContext, rawDataFrame)
	fp.recordSimMatrix(simMat, pv.simMatrixFile)
	model, unifiedRDDVecs = cluster.getClusterModel(sparkContext, simMat, rawDataFrame, (pv.truncateLineCount/pv.IDFOREACHCLUSTER), pv.dimensionReductionNum, pv.eigenVecFile)
	eva.evaluateModel(model, unifiedRDDVecs)
	fp.outputNodesInSameCluster(model, unifiedRDDVecs, rawDataFrame, pv.clusterIDCenterFile, pv.clusterIDFile)
	decisionTreeModel = classification.process(sparkContext, (pv.truncateLineCount/pv.IDFOREACHCLUSTER), pv.treeMaxDepth, pv.treeMaxBins, pv.eigenVecFile, pv.clusterIDFile)
	model.save(sparkContext, pv.clusterModelPath)
	Utils.logMessage("\nTrain cluster model finished")
def DecisionTreeProcess(trainingSet, testSet, imp, dtMaxDepth, dtMaxBins):
	
	decisionTreeModel = DecisionTree.trainClassifier(trainingSet, numClasses = 4,categoricalFeaturesInfo={},
														impurity=imp,maxDepth=dtMaxDepth, maxBins=dtMaxBins)


	predictions = decisionTreeModel.predict(trainingSet.map(lambda item: item.features))
	trainingLabelsAndPredictions = trainingSet.map(lambda item: item.label).zip(predictions)
	eva.calculateErrorRate("\nClassification model Training set", trainingLabelsAndPredictions)

	predictions = decisionTreeModel.predict(testSet.map(lambda item: item.features))
	testLabelsAndPredictions = testSet.map(lambda item: item.label).zip(predictions)
	eva.calculateErrorRate("\nClassification model Test set", testLabelsAndPredictions)

	return decisionTreeModel
Exemplo n.º 5
0
    def __init__(self, wire, ber, speed, id, stationType = "client"):
        super(Station, self).__init__("node" + str(id))

        self.contextProviders.append(
            openwns.probebus.ConstantContextProvider(
                "DLL.StationType", Evaluation.toStaTypeId(stationType)))

        # Physical Layer (PHY)
        self.phy = copper.Copper.Transceiver(self,
                                             # Name of the PHY
                                             "phy",

                                             # Medium to which the instance is attached to
                                             wire,

                                             # BER this instance experiences
                                             ber,

                                             # Transmit data rate
                                             speed)

        # Data Link Layer (DLL)
        self.dll = Tutorial.Experiment4(self, "ShortCut", 
                                        self.phy.dataTransmission, self.phy.notification, 
                                        stationType = stationType)

        # Network Layer (NL)
        domainName = "node" + str(id) + ".glue.wns.org"
        self.nl = ip.Component.IPv4Component(self,
                                             # Name of the NL
                                             domainName + ".ip",

                                             # Domain name
                                             domainName)

        # Connect NL instance to the DLL interface
        self.nl.addDLL(
            # Name of the DLL interface (Only used within the NL module. Hence, it may differ from
            # the name given to the DLL instance during instantiation)
            _name = "glue",

            # Where to get my IP Address
            _addressResolver = ip.AddressResolver.VirtualDHCPResolver("theOnlySubnet"),

            # ARP zone
            _arpZone = "theOnlySubnet",

            # We can deliver locally
            _pointToPoint = False,

            # DLL SAP for outgoing unicast transmissions
            _dllDataTransmission = self.dll.unicastDataTransmission,

            # DLL SAP for incoming unicast transmissions
            _dllNotification = self.dll.unicastNotification)


        # Traffic generator
        self.load = constanze.node.ConstanzeComponent(self, "constanze", parentLogger = self.logger)
def EvalKLD(summText, pModel):
    KLScore=[]
    #Get the model from the summarization text
    summModel=getModel(summText, len(pModel.values())).prob
    summModelCopy=summModel.copy()
    pModelCopy=pModel.copy()
    Niters=1
    KLScore.append(Ev.getKLScore(summModelCopy,pModelCopy)) #Niters is the number of iterations on which score is obtained

    return KLScore
def process(sc, dtClusterNum, dtMaxDepth, dtMaxBins, eigenVecFile, markedClusterFile):
	filteredEigenVec = sc.textFile(eigenVecFile).map(lambda item: removeVirtualPart(item)).collect()
	clusterIDs = sc.textFile(markedClusterFile).map(lambda item: extractClusterID(item)).collect()
	clusterIdEigenVecMapRDD = sc.parallelize(clusterIDs).zip(sc.parallelize(filteredEigenVec))
	labeledClusterIdEigenVecMapRdd = clusterIdEigenVecMapRDD.map(lambda item: LabeledPoint(item[0], item[1]))

	trainingSet, testSet = labeledClusterIdEigenVecMapRdd.randomSplit([0.7, 0.3])

	decisionTreeModel = DecisionTree.trainClassifier(trainingSet, numClasses = dtClusterNum,
														categoricalFeaturesInfo={},impurity='entropy',maxDepth=dtMaxDepth, maxBins=dtMaxBins)

	predictions = decisionTreeModel.predict(trainingSet.map(lambda item: item.features))
	trainingLabelsAndPredictions = trainingSet.map(lambda item: item.label).zip(predictions)
	eva.calculateErrorRate("\nCluster model Training set", trainingLabelsAndPredictions)

	predictions = decisionTreeModel.predict(testSet.map(lambda item: item.features))
	testLabelsAndPredictions = testSet.map(lambda item: item.label).zip(predictions)
	eva.calculateErrorRate("\nCluster model Test set", testLabelsAndPredictions)

	return decisionTreeModel
Exemplo n.º 8
0
	def boucle_detect_eval(self, range_order, range_seuil) :
		if self.distMoy == [] :
			self.calcul_dist()
	
 		resultat = [[0.0 for u in range(range_order)] for v in range(range_seuil)]
		for i in range(range_order) :
			for j in range(range_seuil):
				self.order = 4 + i		
				self.alpha =  1 + 0.1 * j # en fonction de j plus tard
				self.detect()
				resultat[j][i] =  Evaluation.evalu()
		return resultat
Exemplo n.º 9
0
def trainOptimalModel(trainingData, testData):
	print "\nTraining optimal Random Forest model started!"
	Utils.logTime()

	numTreesVals = [3,5,8]
	featureSubsetStrategyVals = ['auto','all','sqrt','log2','onethird']
	impurityVals = ['gini', 'entropy']
	maxDepthVals = [3,4,5,6,7]
	maxBinsVals = [8,16,32]

	optimalModel = None
	optimalNumTrees = None
	optimalFeatureSubsetStrategy = None
	optimalMaxDepth = None
	optimalImpurity = None
	optimalBinsVal = None
	minError = None

	try:
		for curNumTree in numTreesVals:
			for curFeatureSubsetStrategy in featureSubsetStrategyVals:
				for curImpurity in impurityVals:
					for curMaxDepth in maxDepthVals:
						for curMaxBins in maxBinsVals:
							model = RandomForest.trainClassifier(trainingData, 
																numClasses=2, 
																categoricalFeaturesInfo={}, 
														 		numTrees=curNumTree,
														 		featureSubsetStrategy=curFeatureSubsetStrategy,
														 		impurity=curImpurity, 
														 		maxDepth=curMaxDepth,
														 		maxBins=curMaxBins)
							testErr = Evaluation.evaluate(model, testData)
							if testErr < minError or not minError:
								minError = testErr
								optimalNumTrees = curNumTree
								optimalFeatureSubsetStrategy = curFeatureSubsetStrategy
								optimalImpurity = curImpurity
								optimalMaxDepth = curMaxDepth
								optimalBinsVal = curMaxBins
								optimalModel = model
	except:
		msg = "\nException during model training with below parameters:"
		msg += "\tnum trees: " + str(optimalNumTrees)
		msg += "\tfeature subset strategy: " + optimalFeatureSubsetStrategy
		msg += "\timpurity: " + str(curImpurity)
		msg += "\tmaxDepth: " + str(curMaxDepth)
		msg += "\tmaxBins: " + str(curMaxBins)
		Utls.logMessage(msg)

	logMessage(optimalModel, optimalNumTrees, optimalFeatureSubsetStrategy, optimalMaxDepth, optimalImpurity, optimalBinsVal, minError)
	return optimalModel 
Exemplo n.º 10
0
def run(searchForOptimal, basepath, filepath):
	sc = buildContext()

	trainingData, testData = loadData(sc, basepath, filepath)

	if searchForOptimal:
		optimalRandomForestModel = RandomForest.trainOptimalModel(trainingData, testData)
		Evaluation.evaluate(optimalRandomForestModel, testData, logMessage=True)

		optimalDecisionTreeModel = DecisionTree.trainOptimalModel(trainingData, testData)
		Evaluation.evaluate(optimalDecisionTreeModel, testData, logMessage=True)
	else:
		randomForestModel = RandomForest.trainModel(trainingData)
		Evaluation.evaluate(randomForestModel, testData, logMessage=True)

		decisionTreeModel = DecisionTree.trainModel(trainingData)
		Evaluation.evaluate(decisionTreeModel, testData, logMessage=True)
def EvalKLDRandomSamp(textList,Niters,pModelCopy,Nsumm):

    KLScore=[];
    for iters in range(Niters):
        index=np.random.uniform(0,len(textList),Nsumm)
        indexALLRS=np.array([int(i) for i in index])
        summText=[textList[int(i)] for i in index]
        summModel=getModel(summText, len(pModelCopy.values())).prob
        summModelCopy=summModel.copy()
        pModelCopy=pModel.copy()
        Niters=1
        KLScore.append(Ev.getKLScore(summModelCopy,pModelCopy)) #Niters is the number of iterations on which score is obtained


        #KLScore.append(EvalKLD(summText, pModelCopy))

    return (KLScore, summModel, summText, indexALLRS);
Exemplo n.º 12
0
def trainOptimalModel(trainingData, testData):
	print "\nTraining optimal Decision Tree model started!"
	Utils.logTime()

	impurityVals = ['gini', 'entropy']
	maxDepthVals = [3,4,5,6,7]
	maxBinsVals = [8,16,32]

	optimalModel = None
	optimalMaxDepth = None
	optimalImpurity = None
	optimalBinsVal = None
	minError = None

	try:
		for curImpurity in impurityVals:
			for curMaxDepth in maxDepthVals:
				for curMaxBins in maxBinsVals:
					model = DecisionTree.trainClassifier(trainingData, 
														 numClasses=2, 
														 categoricalFeaturesInfo={}, 
														 impurity=curImpurity, 
														 maxDepth=curMaxDepth,
														 maxBins=curMaxBins)
					testErr, PR, ROC = Evaluation.evaluate(model, testData)
					if testErr < minError or not minError:
						minError = testErr
						optimalImpurity = curImpurity
						optimalMaxDepth = curMaxDepth
						optimalBinsVal = curMaxBins
						optimalModel = model
	except:
		msg = "\nException during model training with below parameters:"
		msg += "\timpurity: " + str(curImpurity)
		msg += "\tmaxDepth: " + str(curMaxDepth)
		msg += "\tmaxBins: " + str(curMaxBins)
		Utils.logMessage(msg)

	logMessage(optimalModel, optimalMaxDepth, optimalImpurity, optimalBinsVal, minError)
	return optimalModel 
Exemplo n.º 13
0
def splitBySignals(dataStep):
#    pass
#if __name__ == '__main__':
    segments= []
    for input, target in dataStep:
        targetInt = np.argmax(Evaluation.addNoGestureSignal(target), 1)
        inds= np.where(targetInt[:-1]!= targetInt[1:])[0]
        lastInd = -1
        for ind in inds:
            if targetInt[ind] != np.max(targetInt):
                iSegment = input[lastInd+1:ind+1]
                tSegement = target[lastInd+1:ind+1]
                tSegement[0,:]=0
                tSegement[-1,:]=0
                segments.append((iSegment,tSegement))
                lastInd = ind
        ind = len(targetInt)-1
        iSegment = input[lastInd+1:ind+1]
        tSegement = target[lastInd+1:ind+1]
        tSegement[0,:]=0
        tSegement[-1,:]=0
        segments.append((iSegment,tSegement))
    return segments
Exemplo n.º 14
0
from Components import *

""" Load data into a numpy matrix.
    (1) load function is used to load a .npy binary file,
        which can be generated from a .txt file
    (2) genfromtxt generates a numpy array from a raw text
        file
"""
print "Loading", sys.argv[1], "similarity matrix..."
cost_mat = load(sys.argv[1])
#cost_mat = genfromtxt(data_path + '/cost-matrices/' + sys.argv[1])

""" Compute the bullseye score.
    Assuming MPEG-7 data is loaded
"""
e = Evaluation(cost_mat, 20, 70)
print "Top 40 bullseye score: ", e.bullseye(40)

""" Compute a new similarity matrix using dice coefficient
    as a population cue
"""
#Geometric mean, to ensure symmetry
cost_mat = sqrt(cost_mat * cost_mat.transpose())
p = Population(cost_mat, 20, 70, verbose=True)
#Not setting k will attempt to automatically find it!
processed_matrix = p.generate_diff(k=13) 

e = Evaluation(processed_matrix, 20, 70)
print "Top 40 bullseye score using dice: ", e.bullseye(40)

""" Update the similarity matrix further using the previous
li_results = []
#5) Like in the training set,Loop through the test set, to get the individual words
for file_name in testset:
    minimum_neg_log_prob = 1000000000
    min_category = ''
    li = get_list_tokens_nltk(corpus, file_name)
    set_list_words = set([w for w in li if w in word_list])

    ##6) Get the probability for each category,
    #using the cat_num_docs dictionary to wade through the categories
    for cat in cat_num_docs:
        neg_log_prob = -log(cat_num_docs[cat] / len(trainset))
        for w in word_cat_num_doc_dict:
            if w in set_list_words:
                neg_log_prob -= log(word_cat_num_doc_dict[w][cat])
            else:
                neg_log_prob -= log(1 - word_cat_num_doc_dict[w][cat])

        if minimum_neg_log_prob > neg_log_prob:
            min_category = cat
            minimum_neg_log_prob = neg_log_prob

    li_results.append((file_name, min_category, f2c(corpus, file_name)))
if binary_classification:
    Evaluation.evaluation_binary(li_results)
else:
    Evaluation.evaluation_multi_class(li_results, cat_num_docs.keys())

print "The time taken by the trained classifier to assign labels"
print time.time() - start_time, "seconds"
    #Build the Probability density Function
    print "Building the Model"
    pModel={}
    #pModel=getModel(textList, Nterms).prob
    #pModel=getModel_UD(textList)
    pModel=est_probability_UD(textList);
    f=open('Model1.txt','w');
    dump_model(pModel,f)
    f.close()

    # get the length Model
    wordList=pModel.keys();
    lenPdf=getlengthpdf(textList, wordList)

    # Get the sentiment joint probability
    [textSentiDict, sentiDict] = Ev.getSentimentScoreAllText(textList);

    #[sentimentProb, textSentiDict, sentiDict]=getSentimentProb(pModel.keys(), textList)
    sentimentProb=getSentimentProb_FromEV(pModel.keys(), textList);


    #GEt the TF Matrix for the input Tweets
    cleanWords=cleanUpWords(pModel.keys())
    TFMat=[];
    for i in range(len(textList)):
        TFMat.append(np.array([0 for col in range(len(pModel.values()))]))

    TFvec=np.array([0 for col in range(len(pModel.values()))])
    for i in range(len(textList)):
        textWords=createStemmedWordList(textList[i])
        j=0
Exemplo n.º 17
0
def sampling(settings, types_dict, types_dict_c, out, ncounterfactuals, clf,
             n_batches_train, n_samples_train, k, n_input, degree_active):

    argvals = settings.split()
    args = Helpers.getArgs(argvals)

    # Creating graph
    sess_HVAE = tf.Graph()

    with sess_HVAE.as_default():
        # args.model_name: excluded
        tf_nodes = Graph.C_CHVAE_graph(
            args.types_file,
            args.types_file_c,
            learning_rate=1e-3,
            z_dim=args.dim_latent_z,
            y_dim=args.dim_latent_y,
            s_dim=args.dim_latent_s,
            y_dim_partition=args.dim_latent_y_partition,
            nsamples=1000,
            p=2)

    # start session
    with tf.Session(graph=sess_HVAE) as session:
        # Add ops to save and restore all the variables.
        saver = tf.train.Saver()
        print('Initizalizing Variables ...')
        tf.global_variables_initializer().run()

        # -----------------------------------------------------------------------------------#
        # Apply on training data

        print('Training the CHVAE ...')
        if (args.train == 1):

            start_time = time.time()
            # Training cycle

            loglik_epoch = []
            KL_s_epoch = []
            KL_z_epoch = []
            for epoch in tqdm(range(args.epochs)):
                avg_loss = 0.
                avg_KL_s = 0.
                avg_KL_z = 0.
                samples_list = []
                p_params_list = []
                q_params_list = []
                log_p_x_total = []

                # Annealing of Gumbel-Softmax parameter
                tau = np.max([1.0 - 0.001 * epoch, 1e-3])

                # Randomize the data in the mini-batches
                train_data = out['training'][1]
                train_data_c = out['training'][2]
                random_perm = np.random.permutation(
                    range(np.shape(train_data)[0]))
                train_data_aux = train_data[random_perm, :]
                train_data_aux_c = train_data_c[random_perm, :]

                for i in range(n_batches_train):
                    # Create inputs for the feed_dict
                    data_list = Helpers.next_batch(train_data_aux,
                                                   types_dict,
                                                   args.batch_size,
                                                   index_batch=i)  # DONE
                    data_list_c = Helpers.next_batch(train_data_aux_c,
                                                     types_dict_c,
                                                     args.batch_size,
                                                     index_batch=i)  # DONE

                    # Create feed dictionary
                    feedDict = {
                        i: d
                        for i, d in zip(tf_nodes['ground_batch'], data_list)
                    }
                    feedDict.update({
                        i: d
                        for i, d in zip(tf_nodes['ground_batch_c'],
                                        data_list_c)
                    })
                    feedDict[tf_nodes['tau_GS']] = tau
                    feedDict[tf_nodes['batch_size']] = args.batch_size

                    # Running VAE
                    _, X_list, loss, KL_z, KL_s, samples, log_p_x, p_params, q_params = session.run(
                        [
                            tf_nodes['optim'], tf_nodes['X'],
                            tf_nodes['loss_re'], tf_nodes['KL_z'],
                            tf_nodes['KL_s'], tf_nodes['samples'],
                            tf_nodes['log_p_x'], tf_nodes['p_params'],
                            tf_nodes['q_params']
                        ],
                        feed_dict=feedDict)

                    # Collect all samples, distirbution parameters and logliks in lists
                    if i == 0:
                        samples_list = [samples]
                        p_params_list = [p_params]
                        q_params_list = [q_params]
                        log_p_x_total = [log_p_x]
                    else:
                        samples_list.append(samples)
                        p_params_list.append(p_params)
                        q_params_list.append(q_params)
                        log_p_x_total.append(log_p_x)

                    # Compute average loss
                    avg_loss += np.mean(loss)
                    avg_KL_s += np.mean(KL_s)
                    avg_KL_z += np.mean(KL_z)

                # Concatenate samples in arrays
                s_total, z_total, y_total, est_data = Helpers.samples_concatenation(
                    samples_list)

                # Transform discrete variables back to the original values
                train_data_transformed = Helpers.discrete_variables_transformation(
                    train_data_aux[:n_batches_train * args.batch_size, :],
                    types_dict)
                est_data_transformed = Helpers.discrete_variables_transformation(
                    est_data, types_dict)

                # Create global dictionary of the distribution parameters
                p_params_complete = Helpers.p_distribution_params_concatenation(
                    p_params_list,  # DONE
                    types_dict,
                    args.dim_latent_z,
                    args.dim_latent_s)

                q_params_complete = Helpers.q_distribution_params_concatenation(
                    q_params_list,  # DONE
                    args.dim_latent_z,
                    args.dim_latent_s)

                # Compute mean and mode of our loglik models: these correspond to the estimated values
                loglik_mean, loglik_mode = Helpers.statistics(
                    p_params_complete['x'], types_dict)  # DONE

                # Try this for the errors
                error_train_mean = Helpers.error_computation(
                    train_data_transformed, loglik_mean, types_dict)
                error_train_mode = Helpers.error_computation(
                    train_data_transformed, loglik_mode, types_dict)
                error_train_samples = Helpers.error_computation(
                    train_data_transformed, est_data_transformed, types_dict)

                # Display logs per epoch step
                if epoch % args.display == 0:
                    print_loss(epoch, start_time, avg_loss / n_batches_train,
                               avg_KL_s / n_batches_train,
                               avg_KL_z / n_batches_train)
                    print("")

            # Plot evolution of test loglik
                loglik_per_variable = np.sum(np.concatenate(log_p_x_total, 1),
                                             1) / n_samples_train

                loglik_epoch.append(loglik_per_variable)

            # -----------------------------------------------------------------------------------#
            # Apply on test data

            for i in range(1):
                samples_test_list = []
                test_params_list = []
                log_p_x_test_list = []
                data_c_list = []

                test_data_counter = out['test_counter'][1]
                test_data_c_counter = out['test_counter'][2]
                y_test_counter = out['test_counter'][3]
                n_samples_test = test_data_counter.shape[0]

                # Create test minibatch
                data_list = Helpers.next_batch(test_data_counter,
                                               types_dict,
                                               n_samples_test,
                                               index_batch=i)
                data_list_c = Helpers.next_batch(test_data_c_counter,
                                                 types_dict_c,
                                                 n_samples_test,
                                                 index_batch=i)  # DONE

                # Constant Gumbel-Softmax parameter (where we have finished the annealing
                tau = 1e-3

                # Create feed dictionary
                feedDict = {
                    i: d
                    for i, d in zip(tf_nodes['ground_batch'], data_list)
                }
                feedDict.update({
                    i: d
                    for i, d in zip(tf_nodes['ground_batch_c'], data_list_c)
                })
                feedDict[tf_nodes['tau_GS']] = tau
                feedDict[tf_nodes[
                    'batch_size']] = ncounterfactuals  # n_samples_test

                # Get samples from the generator function (computing the mode of all distributions)
                samples_test, log_p_x_test, test_params, theta_test, normalization_params_test, X, delta_kl = session.run(
                    [
                        tf_nodes['samples_test'], tf_nodes['log_p_x_test'],
                        tf_nodes['test_params'], tf_nodes['theta_test'],
                        tf_nodes['normalization_params'], tf_nodes['X'],
                        tf_nodes['delta_kl']
                    ],
                    feed_dict=feedDict)

                samples_test_list.append(samples_test)
                test_params_list.append(test_params)
                log_p_x_test_list.append(log_p_x_test)
                data_c_list.append(data_list_c)

            # Concatenate samples in arrays
            s_total_test, z_total_test, y_total_test, samples_total_test = Helpers.samples_concatenation(
                samples_test_list)

            # Transform discrete variables back to the original values
            est_samples_transformed = Helpers.discrete_variables_transformation(
                samples_total_test, types_dict)

            # -----------------------------------------------------------------------------------#
            # Find k Attainable Counterfactuals
            print('[*] Find Attainable Counterfactuals...')

            counter_batch_size = 1  # counterfactual batch size (i.e. look for counterfactuals one by one)
            data_concat = []
            data_concat_c = []
            counterfactuals = []
            latent_tilde = []
            latent = []

            search_samples = args.search_samples
            p = args.norm_latent_space

            for i in tqdm(range(ncounterfactuals)):

                s = (k, n_input)  # preallocate k spots; # inputs
                sz = (k, args.dim_latent_z)
                s = np.zeros(s)
                sz = np.zeros(sz)
                ik = 0  # counter

                l = 0
                step = args.step_size

                x_adv, y_adv, z_adv, d_adv = None, None, None, None

                #scale test observations
                scaled_test, scaler_test = Helpers.standardize(
                    test_data_counter)

                # get one test observation
                data_list = Helpers.next_batch(test_data_counter,
                                               types_dict,
                                               counter_batch_size,
                                               index_batch=i)
                data_list_c = Helpers.next_batch(test_data_c_counter,
                                                 types_dict_c,
                                                 counter_batch_size,
                                                 index_batch=i)
                hat_y_test = np.repeat(y_test_counter[i] * 1,
                                       search_samples,
                                       axis=0)
                test_data_c_replicated = np.repeat(
                    test_data_c_counter[i, :].reshape(1, -1),
                    search_samples,
                    axis=0)
                replicated_scaled_test = np.repeat(scaled_test[i, :].reshape(
                    1, -1),
                                                   search_samples,
                                                   axis=0)

                # get replicated observations (observation replicated nsamples times)
                #replicated_scaled_test = Helpers.replicate_data_list(data_list_scaled, search_samples)
                replicated_data_list = Helpers.replicate_data_list(
                    data_list, search_samples)
                replicated_data_list_c = Helpers.replicate_data_list(
                    data_list_c, search_samples)
                replicated_z = np.repeat(z_total_test[i].reshape(
                    -1, args.dim_latent_z),
                                         search_samples,
                                         axis=0)

                h = l + step
                # counter to stop
                count = 0
                counter_step = 1
                max_step = 500

                while True:

                    count = count + counter_step

                    if (count > max_step) == True:
                        sz = None
                        s = None
                        z = z_total_test[i].reshape(-1, args.dim_latent_z)
                        break

                    if degree_active == 1:  #choose all latent features for search

                        delta_z = np.random.randn(
                            search_samples, replicated_z.shape[1]
                        )  # http://mathworld.wolfram.com/HyperspherePointPicking.html
                        d = np.random.rand(search_samples) * (
                            h - l) + l  # length range [l, h)
                        norm_p = np.linalg.norm(delta_z, ord=p, axis=1)
                        d_norm = np.divide(d, norm_p).reshape(
                            -1, 1)  # rescale/normalize factor
                        delta_z = np.multiply(delta_z, d_norm)
                        z_tilde = replicated_z + delta_z  # z tilde

                    else:

                        delta_z = np.random.randn(
                            search_samples, replicated_z.shape[1]
                        )  # http://mathworld.wolfram.com/HyperspherePointPicking.html
                        d = np.random.rand(search_samples) * (
                            h - l) + l  # length range [l, h)
                        norm_p = np.linalg.norm(delta_z, ord=p, axis=1)
                        d_norm = np.divide(d, norm_p).reshape(
                            -1, 1)  # rescale/normalize factor
                        delta_z = np.multiply(delta_z, d_norm)

                        mask = np.tile(
                            delta_kl[3][0, :] * 1,
                            (search_samples,
                             1))  # only alter most important latent features
                        delta_z = np.multiply(delta_z, mask)

                        z_tilde = replicated_z + delta_z

                    # create feed dictionary
                    feedDict = {
                        i: d
                        for i, d in zip(tf_nodes['ground_batch'],
                                        replicated_data_list)
                    }
                    feedDict.update({
                        i: d
                        for i, d in zip(tf_nodes['ground_batch_c'],
                                        replicated_data_list_c)
                    })
                    feedDict[tf_nodes['samples_z']] = z_tilde
                    feedDict[tf_nodes['tau_GS']] = tau
                    feedDict[tf_nodes['batch_size']] = search_samples

                    theta_perturbed, samples_perturbed = session.run(
                        [
                            tf_nodes['theta_perturbed'],
                            tf_nodes['samples_perturbed']
                        ],
                        feed_dict=feedDict)

                    x_tilde, params_x_perturbed = Evaluation.loglik_evaluation_test(
                        X_list, theta_perturbed, normalization_params_test,
                        types_dict)
                    x_tilde = np.concatenate(x_tilde, axis=1)
                    scaled_tilde = scaler_test.transform(x_tilde)
                    d_scale = np.sum(np.abs(scaled_tilde -
                                            replicated_scaled_test),
                                     axis=1)

                    x_tilde = np.c_[test_data_c_replicated, x_tilde]
                    y_tilde = clf.predict(x_tilde)

                    indices_adv = np.where(y_tilde == 0)[0]

                    if len(indices_adv) == 0:  # no candidate generated
                        l = h
                        h = l + step
                    elif all(s[k - 1, :] == 0):  # not k candidates generated

                        indx = indices_adv[np.argmin(d_scale[indices_adv])]
                        assert (y_tilde[indx] != 1)

                        s[ik, :] = x_tilde[indx, :]
                        sz[ik, :] = z_tilde[indx, :]
                        z = z_total_test[i].reshape(-1, args.dim_latent_z)

                        ik = ik + 1  # up the count
                        l = h
                        h = l + step
                    else:  # k candidates genereated
                        break

                data_concat.append(np.concatenate(data_list, axis=1))
                data_concat_c.append(np.concatenate(data_list_c, axis=1))
                counterfactuals.append(s)
                latent_tilde.append(sz)
                latent.append(z)

    cchvae_counterfactuals = np.array(counterfactuals)
    return cchvae_counterfactuals
Exemplo n.º 18
0
def test_recall(y_pred):
    y_true = [0, 1]
    expect = recall_score(y_true, y_pred)
    assert ev.recall(y_true, y_pred) == expect
Exemplo n.º 19
0
print("----------------------------------------------------------------------")
print("----------------------------------------------------------------------")
print("")
print(
    f"Recommended songs:\n{personal_recommendations[:top_hits].drop(['user_id', 'score', 'rank'], axis = 1)}"
)
print("----------------------------------------------------------------------")
print(" \n Starting error evaluation\n ")
#Evaluation
start = time.time()

#Define what percentage of users to use for precision recall calculation
user_sample = 0.05

#Instantiate the precision_recall_calculator class
pr = Evaluation.precision_recall_calculator(test_data, train_data, pm,
                                            is_model)

#Call method to calculate precision and recall values
(pm_avg_precision_list, pm_avg_recall_list, ism_avg_precision_list,
 ism_avg_recall_list) = pr.calculate_measures(user_sample)

end = time.time()
print(end - start)

#Visually display the precision and recall quality of both models

print(" \nPlotting precision recall curves.")
plot_precision_recall(pm_avg_precision_list, pm_avg_recall_list,
                      "popularity_model", ism_avg_precision_list,
                      ism_avg_recall_list, "item_similarity_model")
Exemplo n.º 20
0
def Model(Label,Parameters=[]):
    global filepath, filename, fixed_seed_num, sequence_window, number_class, hidden_units, input_dim, learning_rate, epoch, is_multi_scale, training_level, cross_cv, is_add_noise, noise_ratio
    try:
        filepath = Parameters["filepath"]
        filename = Parameters["filename"]
        sequence_window = Parameters["sequence_window"]
        number_class = Parameters["number_class"]
        hidden_units = Parameters["hidden_units"]
        input_dim = Parameters["input_dim"]
        learning_rate = Parameters["learning_rate"]
        epoch = Parameters["epoch"]
        is_multi_scale = Parameters["is_multi_scale"]
        training_level = Parameters["training_level"]
        cross_cv = Parameters["cross_cv"]
        fixed_seed_num = Parameters["fixed_seed_num"]
        is_add_noise = Parameters["is_add_noise"]
        noise_ratio = Parameters["noise_ratio"]
    except:
        pass


    result_list_dict = defaultdict(list)
    evaluation_list = ["ACCURACY","F1_SCORE","AUC","G_MEAN"]
    for each in evaluation_list:
        result_list_dict[each] = []
    np.random.seed(fixed_seed_num)  # for reproducibility
    #num_selected_features = 30
    #num_selected_features = 25#AS leak tab=0
    #num_selected_features = 32#Slammer tab=0
    num_selected_features = 33#Nimda tab=1
    for tab_cv in range(cross_cv):

        if not tab_cv == 0 :continue
        epoch_training_loss_list = []
        epoch_val_loss_list = []
        #print(is_multi_scale)

        #using MLP to train
        if Label == "SVM":
            x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename,
                                                                                            sequence_window, tab_cv,
                                                                                            cross_cv,
                                                                                            Multi_Scale=is_multi_scale,
                                                                                            Wave_Let_Scale=training_level,
                                                                                            Normalize=0)

            print(Label+" is running..............................................")
            y_train = y_train0
            clf = svm.SVC(kernel="rbf", gamma=0.00001, C=100000,probability=True)
            print(x_train.shape)
            clf.fit(x_train, y_train)
            result = clf.predict_proba(x_test)
            #return Evaluation.Evaluation(y_test, result)
            #results = Evaluation.Evaluation(y_test, result)

        elif Label == "SVMF":
            x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename,
                                                                                            sequence_window, tab_cv,
                                                                                            cross_cv,
                                                                                            Multi_Scale=is_multi_scale,
                                                                                            Wave_Let_Scale=training_level,
                                                                                            Normalize=5)

            print(Label+" is running..............................................")
            clf = svm.SVC(kernel="rbf", gamma=0.00001, C=100000,probability=True)
            print(x_train.shape)
            #x_train_new = SelectKBest(f_classif, k=num_selected_features).fit_transform(x_train, y_train0)
            #x_test_new = SelectKBest(f_classif, k=num_selected_features).fit_transform(x_test, y_test0)

            clf.fit(x_train, y_train0)
            result = clf.predict_proba(x_test)
            #return Evaluation.Evaluation(y_test, result)
            #results = Evaluation.Evaluation(y_test, result)
        elif Label == "SVMW":
            x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename,
                                                                                            sequence_window, tab_cv,
                                                                                            cross_cv,
                                                                                            Multi_Scale=is_multi_scale,
                                                                                            Wave_Let_Scale=training_level,
                                                                                            Normalize=6)

            print(Label + " is running..............................................")
            #SVR(kernel="linear") = svm.SVC(kernel="rbf", gamma=0.00001, C=100000, probability=True)
            estimator = svm.SVC(kernel="linear",probability=True)
            selector = RFE(estimator, num_selected_features, step=1)
            selector = selector.fit(x_train, y_train0)

            result = selector.predict_proba(x_test)
            # return Evaluation.Evaluation(y_test, result)
            # results = Evaluation.Evaluation(y_test, result)
        elif Label == "NBF":

            x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename,
                                                                                            sequence_window, tab_cv,
                                                                                            cross_cv,
                                                                                            Multi_Scale=is_multi_scale,
                                                                                            Wave_Let_Scale=training_level,
                                                                                            Normalize=10)

            print(Label + " is running..............................................")
            clf = MultinomialNB()
            clf.fit(x_train, y_train0)
            result = clf.predict_proba(x_test)


        elif Label == "NBW":
            x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename,
                                                                                            sequence_window, tab_cv,
                                                                                            cross_cv,
                                                                                            Multi_Scale=is_multi_scale,
                                                                                            Wave_Let_Scale=training_level,
                                                                                            Normalize=11)

            print(Label + " is running..............................................")
            #SVR(kernel="linear") = svm.SVC(kernel="rbf", gamma=0.00001, C=100000, probability=True)
            estimator = MultinomialNB()
            selector = RFE(estimator, num_selected_features, step=1)
            selector = selector.fit(x_train, y_train0)

            result = selector.predict_proba(x_test)
            # return Evaluation.Evaluation(y_test, result)
            # results = Evaluation.Evaluation(y_test, result)
        elif Label == "NB":
            x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename,
                                                                                            sequence_window, tab_cv,
                                                                                            cross_cv,
                                                                                            Multi_Scale=is_multi_scale,
                                                                                            Wave_Let_Scale=training_level,
                                                                                            Normalize=1)

            print(Label+" is running..............................................")
            y_train = y_train0
            clf = MultinomialNB()
            clf.fit(x_train, y_train)
            result = clf.predict_proba(x_test)

            #return Evaluation.Evaluation(y_test, result)
            #results = Evaluation.Evaluation(y_test, result)

        elif Label == "DT":
            x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename,
                                                                                            sequence_window, tab_cv,
                                                                                            cross_cv,
                                                                                            Multi_Scale=is_multi_scale,
                                                                                            Wave_Let_Scale=training_level,
                                                                                            Normalize=2)

            print(Label+" is running.............................................."+str(x_train.shape))
            y_train = y_train0
            clf = tree.DecisionTreeClassifier()
            clf.fit(x_train, y_train)
            result = clf.predict_proba(x_test)

            #return Evaluation.Evaluation(y_test, result)
            #results = Evaluation.Evaluation(y_test, result)
        elif Label == "Ada.Boost":
            x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename,
                                                                                            sequence_window, tab_cv,
                                                                                            cross_cv,
                                                                                            Multi_Scale=is_multi_scale,
                                                                                            Wave_Let_Scale=training_level,
                                                                                            Normalize=0)

            print(Label+" is running.............................................."+str(x_train.shape))
            y_train = y_train0
            #clf = AdaBoostClassifier(n_estimators=10) #Nimda tab=1
            clf = AdaBoostClassifier(n_estimators=10)

            clf.fit(x_train, y_train)
            result = clf.predict_proba(x_test)

            #return Evaluation.Evaluation(y_test, result)
            #results = Evaluation.Evaluation(y_test, result)
        elif Label == "MLP":
            x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData_WithoutS(is_add_noise,noise_ratio,filepath, filename,
                                                                                            sequence_window, tab_cv,
                                                                                            cross_cv,
                                                                                            Multi_Scale=is_multi_scale,
                                                                                            Wave_Let_Scale=training_level,
                                                                                            Normalize=0)

            print(Label+" is running..............................................")
            batch_size = len(y_train)
            start = time.clock()
            model = Sequential()
            model.add(Dense(hidden_units, activation="relu", input_dim=33))

            model.add(Dense(output_dim=number_class))
            model.add(Activation("sigmoid"))
            # model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
            model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

            model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=epoch)
            #result = model.predict(X_Testing, batch_size=batch_size)
            result = model.predict(x_test)
            end = time.clock()
            print("The Time For MLP is " + str(end - start))

            #return Evaluation.Evaluation(y_test, result)
            #results = Evaluation.Evaluation(y_test, result)

        #elif Label == "SVM-S":
            #x_train, y_train, y_train0, x_test, y_test, y_test0 = LoadData.GetData('Attention',filepath,filename,sequence_window,tab_cv,cross_cv)
            #x_train,y_train = Manipulation(x_train,y_train0,sequence_window)
            #x_test, y_test = Manipulation(x_test, y_test0, sequence_window)
            #clf = svm.SVC(kernel="rbf")
            #clf.fit(x_train, y_train)
            #result = clf.predict(x_test)
            #results = Evaluation.Evaluation_WithoutS(y_test, result)
        elif Label == "RNN":
            print(Label+" is running..............................................")
            start = time.clock()
            x_train_multi_list, x_train, y_train, x_testing_multi_list, x_test, y_test = LoadData.GetData(is_add_noise,noise_ratio,'Attention',
                                                                                                          filepath,
                                                                                                          filename,
                                                                                                          sequence_window,
                                                                                                          tab_cv,
                                                                                                          cross_cv,
                                                                                                          Multi_Scale=is_multi_scale,
                                                                                                          Wave_Let_Scale=training_level)

            batch_size = len(y_train)
            rnn_object = SimpleRNN(hidden_units, input_length=len(x_train[0]), input_dim=input_dim)
            model = Sequential()

            model.add(rnn_object)  # X.shape is (samples, timesteps, dimension)
            #model.add(Dense(30, activation="relu"))
            #model.add(Dropout(0.2))
            model.add(Dense(30, activation="sigmoid"))
            #model.add(Dropout(0.3))
            # model.add(Dense(5,activation="tanh"))

            model.add(Dense(output_dim=number_class))
            model.add(Activation("sigmoid"))
            # model.add(Activation("softmax"))

            # model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
            model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

            model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=epoch)

            #result = model.predict(X_Testing, batch_size=batch_size)

            result = model.predict(x_test)

            #return Evaluation.Evaluation(y_test, result)
            #results = Evaluation.Evaluation(y_test, result)

            end = time.clock()
            print("The Time For RNN is " + str(end - start))

            # print(result)
        elif Label == "LSTM":
            print(Label+" is running..............................................")
            start = time.clock()
            x_train_multi_list, x_train, y_train, x_testing_multi_list, x_test, y_test = LoadData.GetData(is_add_noise,noise_ratio,'Attention',filepath,
                                                                                                          filename,
                                                                                                          sequence_window,
                                                                                                          tab_cv,
                                                                                                          cross_cv,
                                                                                                          Multi_Scale=is_multi_scale,
                                                                                                          Wave_Let_Scale=training_level)

            batch_size = len(y_train)

            lstm_object = LSTM(hidden_units, input_length=len(x_train[0]), input_dim=input_dim)
            model = Sequential()

            model.add(lstm_object)  # X.shape is (samples, timesteps, dimension)
            # model.add(LSTM(lstm_size,return_sequences=True,input_shape=(len(X_Training[0]),33)))
            # model.add(LSTM(100,return_sequences=True))
            # model.add(Dense(10, activation="tanh"))
            # model.add(Dense(5,activation="tanh"))
            model.add(Dense(30, activation="relu"))
            #model.add(Dropout(0.2))

            #model.add(Dense(30, activation="sigmoid"))
            #model.add(Dropout(0.3))
            # model.add(Dense(5,activation="tanh"))

            model.add(Dense(output_dim=number_class))
            model.add(Activation("sigmoid"))
            #model.add(Activation("softmax"))

            # model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
            model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

            model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=epoch)

            #result = model.predict(X_Testing, batch_size=batch_size)

            result = model.predict(x_test)

            end = time.clock()
            print("The Time For LSTM is " + str(end - start))

        if len(Parameters) > 0:
            return Evaluation.Evaluation(y_test, result)#Plotting AUC

        results = Evaluation.Evaluation(y_test, result)# Computing ACCURACY,F1-score,..,etc
        print(results)
        y_test2 = np.array(Evaluation.ReverseEncoder(y_test))
        result2 = np.array(Evaluation.ReverseEncoder(result))
        print("---------------------------1111111111111111")
        with open("StatFalseAlarm_"+filename+"_True.txt","w") as fout:
            for tab in range(len(y_test2)):
                fout.write(str(int(y_test2[tab]))+'\n')
        with open("StatFalseAlarm_"+filename+"_"+Label+"_"+"_Predict.txt","w") as fout:
            for tab in range(len(result2)):
                fout.write(str(int(result2[tab]))+'\n')
        print(result2.shape)
        print("---------------------------22222222222222222")

        for each_eval, each_result in results.items():
            result_list_dict[each_eval].append(each_result)

    for eachk, eachv in result_list_dict.items():
        result_list_dict[eachk] = np.average(eachv)
    #print(result_list_dict)
    if is_add_noise == False:
        with open(os.path.join(os.getcwd(),"Comparison_Log_"+filename+".txt"),"a")as fout:
            outfileline = Label+":__"
            fout.write(outfileline)
            for eachk,eachv in result_list_dict.items():
                fout.write(eachk+": "+str(round(eachv,3))+",\t")
            fout.write('\n')
    else:
        with open(os.path.join(os.getcwd(),"Comparison_Log_Adding_Noise_"+filename+".txt"),"a")as fout:
            outfileline = Label+":__"+"Noise_Ratio_:"+str(noise_ratio)
            fout.write(outfileline)
            for eachk,eachv in result_list_dict.items():
                fout.write(eachk+": "+str(round(eachv,3))+",\t")
            fout.write('\n')

    return results
Exemplo n.º 21
0
                                f.close()

                                eva_start_time = time.time()
                                ini = Initialization(dataset_name,
                                                     product_name)
                                iniW = IniWallet(dataset_name, product_name,
                                                 wallet_distribution_type)

                                seed_cost_dict = ini.constructSeedCostDict()
                                graph_dict = ini.constructGraphDict(
                                    cascade_model)
                                product_list = ini.constructProductList()
                                num_product = len(product_list)
                                wallet_dict = iniW.constructWalletDict()

                                eva = Evaluation(graph_dict, product_list,
                                                 ppp_strategy, True)
                                personal_prob_dict = eva.setPersonalPurchasingProbDict(
                                    wallet_dict)

                                print('@ ' + model_name +
                                      ' evaluation @ dataset_name = ' +
                                      dataset_name + '_' + cascade_model +
                                      ', product_name = ' + product_name +
                                      ', wd = ' + wallet_distribution_type +
                                      ', ppp = ' + ppp_strategy)

                                result10_pro_list = []
                                for _ in range(100):
                                    pro, pro_k_list, pnn_k_list = eva.getSeedSetProfit(
                                        seed_set, copy.deepcopy(wallet_dict),
                                        copy.deepcopy(personal_prob_dict))
Exemplo n.º 22
0
    opt = Oger.evaluation.Optimizer(gridsearch_parameters, evaluationFunction)
    
    
    #===========================================================================
    # Uncomment the following to lines to use single process training.
    #===========================================================================
    opt.scheduler = mdp.parallel.ProcessScheduler(n_processes=2, verbose=False)
    mdp.activate_extension("parallel")
    
    #Start gridsearch using nFolds and cross validation
    opt.grid_search(data, flow, n_folds=nFolds, cross_validate_function=Oger.evaluation.n_fold_random)
    

    
    #Plot minimum errors        
    Evaluation.plotMinErrors(opt.errors, opt.parameters, opt.parameter_ranges, pp)
    
    
    #Plot errorspace along thee axis:
    i = 0
    axisOne = -1
    axisTwo = -1
    axisThree = -1
    for node , param in opt.parameters:
        if param == 'spectral_radius':
            axisOne = i
        elif param == 'leak_rate':
            axisTwo = i
        elif param == 'ridge_param':
            axisThree = i
        i =i+1
myNet20 = net.Dense()
myNet20.AddLayer(5, isInput=True)
myNet20.AddLayer(20, activationFunction=af.Sigmoid)
myNet20.AddLayer(1, activationFunction=af.Sigmoid)

start_time = time.time()
myNet5.Train(dataset, target, iterationCount= 10, learningRateStart=.6, learningRateEnd=.0, regulationRate=.02)
train5_Time = time.time() - start_time
start_time = time.time()
myNet20.Train(dataset, target, iterationCount= 10, learningRateStart=.6, learningRateEnd=.0, regulationRate=.02)
train20_Time = time.time() - start_time

# **************************************** Print Metrics *************************************************************
from sklearn.metrics import mean_squared_error
t5, o5= ev.GetPredictions(myNet5, test_dataset, test_target)
t20, o20= ev.GetPredictions(myNet20, test_dataset, test_target)
print(" ")
print("---------------------------------------------------------------------------------")
print("-------------------------------- airfoil_self_noise -----------------------------")

print(">>>>>>>>>>>>> 5 hidden units")
print("mean_squared_error : {}".format(round(mean_squared_error(t5, o5),6))) 
print("Root_MSE           : {}".format(round(math.sqrt(mean_squared_error(t5, o5)),6)))     
print("Training Time      : {} Seconds".format(round(train5_Time,3)))
print("-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -")
print(">>>>>>>>>>>>> 20 hidden units")
print("mean_squared_error : {}".format(round(mean_squared_error(t20, o20),6)))
print("Root_MSE           : {}".format(round(math.sqrt(mean_squared_error(t20, o20)),6)))
print("Training Time      : {} Seconds".format(round(train20_Time,3)))
Exemplo n.º 24
0
    def __init__(self, generate_prompts):
        configs = []

        if not Settings.config.has_option("DEFAULT", "domains"):
            logger.error(
                "You must specify the domains under the DEFAULT section of the config"
            )
        domains = Settings.config.get("DEFAULT", 'domains')
        logger.info('--Simulating dialogues over the domains: ', domains)
        self.possible_domains = domains.split(',')
        DomainUtils.checkDomainStrings(domainStrings=self.possible_domains)

        self.maxTurns = 30
        if Settings.config.has_option("simulate", "maxturns"):
            configs.append('maxturns')
            self.maxTurns = Settings.config.getint("simulate", "maxturns")
        self.forceNullPositive = False
        if Settings.config.has_option("simulate", "forcenullpositive"):
            configs.append('forcenullpositive')
            self.forceNullPositive = Settings.config.getboolean(
                "simulate", "forcenullpositive")
        conf_scorer_name = 'additive'
        if Settings.config.has_option('simulate', 'confscorer'):
            conf_scorer_name = Settings.config.get('simulate', 'confscorer')

        if Settings.config.has_section('simulate'):
            for opt in Settings.config.options('simulate'):
                if opt not in configs and opt not in Settings.config.defaults(
                ):
                    logger.error('Invalid config: ' + opt)

        # [MultiDomain?] Dialogue Management/policy.
        #-----------------------------------------
        self.topic_manager = TopicManager.TopicManager()

        # Simulated User.
        #-----------------------------------------
        # TODO - deal with multi domain simulation - whilst changing Settings.py ill just pass domain here for now
        logger.debug(
            'simulate.py -- XXXXXXX -- directly passing domain name in simulate at present...'
        )
        self.simulator = UserSimulator.SimulatedUsersManager(
            domainStrings=self.possible_domains)

        # Error Simulator.
        #-----------------------------------------
        # TODO - it is a hack for now passing the domain string directly from config via self.possible_domains. look at this
        #self.errorSimulator = ErrorSimulator.CuedErrorSimulator(conf_scorer_name, domainString=self.possible_domains[0])
        self.errorSimulator = ErrorSimulator.SimulatedErrorManager(
            conf_scorer_name, self.possible_domains)

        # SemO.
        #-----------------------------------------
        self.semoClass = None
        if generate_prompts:
            self.semo_name = 'PassthroughSemO'
            if Settings.config.has_option('hub', 'semo'):
                self.semo_name = Settings.config.get('hub', 'semo')
            # SemO.
            if self.semo_name == 'PassthroughSemO':
                self.semoClass = SemO.PassthroughSemO()
            elif self.semo_name == 'BasicSemO':
                self.semoClass = SemO.BasicSemO()
            else:
                logger.warning('Invalid SemO: %s. Using PassthroughSemO.' %
                               self.semo_name)
                self.semoClass = SemO.PassthroughSemO()

        # Evaluation Manager.
        #-----------------------------------------
        self.evaluator = Evaluation.EvaluationManager(self.possible_domains)
Exemplo n.º 25
0
from Evaluation import *
from DictRelevant import *
import time

startPoint=[".A",".T",".W",".B"]
fileDoc="cisi/CISI.ALL"
fileStopList="cacm/common_words"
fileQuerry="cisi/CISI.QRY"
fileRel='cisi/CISI.REL'
sim_func_dict={1:'product',2:'product_log',3:'cosinus',4:'cosinus_log',5:'langue',6:'langue_corr',7:'BM25'}
ind_sim_func=4
if __name__=="__main__":
	db= DictBase(startPoint)
	db.execute(fileDoc, fileStopList)
	# print db.word_dict['recognition']  #2634   OK
	#print db.tf_mat[2634]     #OK
	'''
	{396: 1, 653: 1, 1426: 1, 1044: 1, 89: 1, 474: 2, 927: 1, 1458: 2, 797: 2, 48: 1, 1202: 2, 94: 1, 568: 2, 1337: 1, 799: 1, 1341: 2, 908: 1, 1421: 1, 601: 1, 858: 1, 861: 1, 990: 1, 863: 1, 102: 2, 108: 1, 1134: 1, 495: 1, 241: 1, 890: 3, 1403: 1, 895: 1}
	'''
	qr=Querry(startPoint,db.word_dict)
	qr.readQuerry(fileQuerry)

	sim =similarity(db, qr.querry_dict,ind_sim_func)
	dr=DictRelevant()
	dr.readRelevantQuerry(fileRel)
	print "finish readRel Dictionary"
	sim_func=sim_func_dict[ind_sim_func]
	eva= Evaluation(db, dr, -1, getattr(sim,sim_func))
	#eva= Evaluation(db, dr, 100, sim.cosinus )
	print eva.mean_avg_prec()
Exemplo n.º 26
0
also use the ALT+SHIFT+E to execute single lines or whole code fragments
I also recommend on Pycharm cell mode plugin for easier execution of code fragments
(Noam)
"""

## The cell seperator

from BabelfyTester import BabelfyTester
from DbWrapper import *
from Evaluation import *
from ModelTrainer import *

##

path = ".."
print "Loading iterators+db cache..."
if(not os.path.isdir(path)):
    path = "C:\\Users\\Noam\\Documents\\GitHub\\DeepProject"


wikiDB = WikipediaDbWrapper(user='******', password='******', database='wiki20151002')
wikiDB.cacheArticleTable()
iter_eval = WikilinksNewIterator(path+"/data/wikilinks/small/evaluation")

babelfy_model = BabelfyTester(wikiDB, path + "/data/wikilinks/babelfy")
evaluation = Evaluation(iter_eval,babelfy_model)
try:
    evaluation.evaluate()
except:
    print "nothing to do"
babelfy_model.finalizeWriter()
Exemplo n.º 27
0
    train_df = pd.read_csv('data_2/u1.base', header=-1)
    train_df.columns = ['userId', 'movieId', 'rating', 'time']
    train_df.drop('time', axis=1, inplace=True)

    test_df = pd.read_csv('data_2/u1.test', header=-1)
    test_df.columns = ['userId', 'movieId', 'rating', 'time']
    test_df.drop('time', axis=1, inplace=True)
elif dataset == '1M':
    train_df = pd.read_csv('data_2/train.csv', header=0)
    train_df.columns = ['userId', 'movieId', 'rating']

    test_df = pd.read_csv('data_2/test.csv', header=0)
    test_df.columns = ['userId', 'movieId', 'rating']

# Ranking (Precision-Recall)
prec, recall = Evaluation.precision_recall(user_recs, train_df.values.tolist(), test_df.values.tolist(), at_top_n)
print 'Precision = %.4f\nRecall = %.4f' % (prec, recall)


# Rating Prediction
preds, actuals = recommender.predict_test(user_knn_model, test_df.values.tolist())
rmse = Evaluation.rmse(preds, actuals)
print 'User kNN rmse', rmse

# preds, actuals = recommender.predict_test(item_knn_model, test_df.values.tolist())
# rmse = Evaluation.rmse(preds, actuals)
# print 'Item kNN rmse', rmse

import numpy as np
print 'mean predictions', np.mean(preds)
print 'mean actual', np.mean(actuals)
Exemplo n.º 28
0
def test_accuracy(y_pred):
    y_true = [0, 1]
    expect = accuracy_score(y_true, y_pred)
    assert ev.accuracy(y_true, y_pred) == expect
Exemplo n.º 29
0
WNS.simulationModel.nodes = [varp] + WNS.simulationModel.nodes

vdhcp = VirtualDHCPServer("vDHCP@",
                          "theOnlySubnet",
                          "192.168.0.2", "192.168.254.253",
                          "255.255.0.0")
WNS.simulationModel.nodes.append(vdhcp)

vdns = VirtualDNSServer("vDNS", "ip.DEFAULT.GLOBAL")
WNS.simulationModel.nodes.append(vdns)


# Configure probes for evaluation
Evaluation.installEvaluation(sim = WNS,
                             loggingStations = range(1, configuration.numberOfStations + 1),
                             dll = WNS.simulationModel.nodes[1].dll,
                             maxPacketDelay = 0.5,     # s
                             maxPacketSize = 2000*8,   # Bit
                             maxBitThroughput = 10E6,  # Bit/s
                             maxPacketThroughput = 1E6, # Packets/s
                             delayResolution = 1000,
                             sizeResolution = 2000,
                             throughputResolution = 10000)

node = openwns.evaluation.createSourceNode(WNS, "glue.phyTrace") 
node.getLeafs().appendChildren(
    openwns.evaluation.JSONTrace(key="__json__", description="JSON testing in PhyUser"))

#openwns.evaluation.default.installEvaluation(sim = WNS)
openwns.setSimulator(WNS)
Exemplo n.º 30
0
parser.add_argument('--Style','-sty',type=str,help='Style for training the network [default: Full]'
                                                     ' [options: Plain, Full]',default='Full')
parser.add_argument('--Network','-net',type=str,help='Network used for training the network [default: DGCNN]'
                                                     ' [options: DGCNN, PointNet++(not supported yet)]',default='DGCNN')
args = parser.parse_args()

##### Set specified GPU to be active
if args.GPU != -1:
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.GPU)

##### Load Training/Testing Data
Loader = IO.ShapeNetIO('./Dataset/ShapeNet',batchsize = args.batchsize)
Loader.LoadTrainValFiles()

##### Evaluation Object
Eval = Evaluation.Eval()

## Number of categories
PartNum = Loader.NUM_PART_CATS
output_dim = PartNum
ShapeCatNum = Loader.NUM_CATEGORIES

#### Export results Directories
if args.ExpRslt:
    dt = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")  # get current time
    BASE_PATH = os.path.expanduser('./Results/ShapeNet/{}_sty-{}_m-{}_{}'.format(args.Network, args.Style, args.m, dt))
    SUMMARY_PATH = os.path.join(BASE_PATH,'Summary'.format(args.m))
    PRED_PATH = os.path.join(BASE_PATH,'Prediction'.format(args.m))
    CHECKPOINT_PATH = os.path.join(BASE_PATH,'Checkpoint'.format(args.m))

    if not os.path.exists(BASE_PATH):
Exemplo n.º 31
0
        # creating a csv writer object
        csvwriter = csv.writer(csvfile)

        # writing the fields
        csvwriter.writerow(fields)

        # writing the data rows
        csvwriter.writerows(rows)
    return None


if __name__ == '__main__':
    # arr = {1:2,3:4,5:1}
    # print(max(arr.values()))
    preProcess = PreProcess.PreProcess()
    eval = Evaluation.Evaluation()
    #
    for n in range(40, 60, 2):
        kmeans = Kmaens.Kmeans(n, preProcess.vectorize_tf_idf())
        print(eval.purity(n, kmeans.y, preProcess.labels))

    # data_vectors_tf_idf = preProcess.vectorize_tf_idf()
    # data_vectors_wv = preProcess.word2wec()
    # optimal_n = len(set(preProcess.labels))

    # # Gaussian Mixture Model
    # print("Gaussian Mixture Model(tf-idf):")
    # gmm = GMMCluster.GMMCluster(data_vectors_tf_idf[:100], 5)
    # cluster = gmm.cluster("tf-idf")
    # print("ARI= ", eval.adjusted_rand_index(preProcess.labels[:100], cluster))
    # print("NMI= ", eval.normalized_mutual_information(preProcess.labels[:100], cluster))
Exemplo n.º 32
0
                f.write(str(seed))
                f.write('\n')

    def load_seeds(self, seeds_path):
        seeds = []
        with open(seeds_path, 'r') as f:
            for seed in f.read():
                seeds.append(seed)
        return seeds

    def draw_graph(self):
        pos = nx.spring_layout(self.graph)
        edge_labels = dict([((
            u,
            v,
        ), d['weight']) for u, v, d in self.graph.edges(data=True)])
        nx.draw_networkx_edge_labels(self.graph, pos, edge_labels=edge_labels)
        nx.draw(self.graph, pos=pos, node_size=100, arrows=True)
        plt.show()


if __name__ == '__main__':
    excofim = extended_CoFIM('../weighted_directed_nets/network.dat',
                             '../weighted_directed_nets/community.dat', 0,
                             1000)
    seeds = excofim.node_expansion(50, 3)
    print seeds
    inf = Evaluation.monte_carlo_extend2(excofim, list(seeds), num_simu=100)
    print "Total influence:", inf
    # excofim.draw_graph()
Exemplo n.º 33
0
def test_precision(y_pred):
    y_true = [0, 1]
    expect = precision_score(y_true, y_pred)
    assert ev.precision(y_true, y_pred) == expect
Exemplo n.º 34
0
def test_confusion_matrix_multi(y_pred):
    y_true = [0, 1, 2]
    expect = confusion_matrix(y_true, y_pred, labels=y_true).flatten().tolist()
    assert ev.confusion_matrix(y_true, y_pred, lbl=y_true) == expect
        ###--------------------DEBUG STATEMENTS----------------------
        #print cat , cat_num_docs[cat]/len(trainset)
        ###--------------------DEBUG STATEMENTS----------------------
        neg_log_prob=-log(cat_num_docs[cat]/len(trainset))
        word_dict = cat_word_dict[cat]
        count_cat = cat_word_count_dict[cat]
        for w in list_words:
            count_word_train=word_dict.get(w,0)
            ratio = (count_word_train+1)/(count_cat+vocab_length)
            neg_log_prob-=log(ratio)           
                         
        if minimum_neg_log_prob>neg_log_prob:
            min_category=cat
            minimum_neg_log_prob=neg_log_prob

    li_results.append((file_name,min_category,f2c(corpus,file_name)))

###--------------------DEBUG STATEMENTS----------------------
#for t in li_results:
 #   print t    
###--------------------DEBUG STATEMENTS----------------------
    
if binary_classification:
    Evaluation.evaluation_binary(li_results)
else:
    Evaluation.evaluation_multi_class(li_results,cat_num_docs.keys())


print "The time taken by the trained classifier to assign labels"
print time.time() - start_time, "seconds"
Exemplo n.º 36
0
 def Evaluation_task(self):
     # Evaluation Code
     evaluate = Evaluation.Evaluation()
     evaluate.create_files()
Exemplo n.º 37
0
    K = env.nbArms;
    policies = [UCB(K, trunc), UCBV(K, trunc), klUCB(K, trunc), klUCB(K, klucb=klucbPoisson), KLempUCB(K, trunc)]
else:
    # Third scenario: Truncated exponential distributions
    trunc = 10
    env = MAB([Exponential(1./p, trunc) for p in range(1, 6)])
    K = env.nbArms;
    policies = [UCB(K, trunc), UCBV(K, trunc), klUCB(K, trunc), klUCB(K, klucb=klucbExp), KLempUCB(K, trunc)]

tsav = int_(linspace(100,horizon-1,200))

if graphic == 'yes':
    figure(1)

k=0
for policy in policies:
    ev = Evaluation(env, policy, nbRep, horizon, tsav)
    print ev.meanReward()
    print ev.meanNbDraws()
    meanRegret = ev.meanRegret()
    if graphic == 'yes':
        semilogx(1+tsav, meanRegret, color = colors[k])
        xlabel('Time')
        ylabel('Regret')
    k = k+1

if graphic == 'yes':
    legend([policy.__class__.__name__ for policy in policies], loc=0)
    title('Average regret for various policies')
    show()
Exemplo n.º 38
0
def test_f1(y_pred):
    y_true = [0, 1]
    expect = f1_score(y_true, y_pred)
    assert ev.f1(y_true, y_pred) == expect
Exemplo n.º 39
0
    maxhumid = condprob(day.humidMax, noneVars[5], noneMeans[5])
    minhumid = condprob(day.humidMin, noneVars[6], noneMeans[6])
    pressure = condprob(day.pressure, noneVars[7], noneMeans[7])
    meanwind = condprob(day.meanWindSpeed, noneVars[8], noneMeans[8])
    maxwind = condprob(day.maxWindSpeed, noneVars[9], noneMeans[9])
    maxgust = condprob(day.maxGustSpeed, noneVars[10], noneMeans[10])
    visibility = condprob(day.visibility, noneVars[11], noneMeans[11])

    #Calculate the entire probability for a none day
    noneprobability = priornone * meantemp * maxtemp * mintemp * dewpoint * meanhumid * maxhumid * minhumid * pressure * meanwind * maxwind * maxgust * visibility
    #Take the max value of the probabilities - the highest one is the prediction

    if (max(noneprobability, rainprobability, snowprobability,
            fogprobability) == noneprobability):
        predictions.append("None")
    elif (max(noneprobability, rainprobability, snowprobability,
              fogprobability) == rainprobability):
        predictions.append("Rain")
    elif (max(noneprobability, rainprobability, snowprobability,
              fogprobability) == snowprobability):
        predictions.append("Snow")
    elif (max(noneprobability, rainprobability, snowprobability,
              fogprobability) == fogprobability):
        predictions.append("Fog")

###End of testing#######

# Output
print "\nPrediction accuracy = %d%%\n" % Evaluation.evaluate(
    testingData, predictions)
Exemplo n.º 40
0
def Model(each_case,Label,Parameters=[]):
    global filepath, filename, fixed_seed_num, sequence_window, number_class, hidden_units, input_dim, learning_rate, epoch, is_multi_scale, training_level, cross_cv, wave_type, is_add_noise, noise_ratio, pooling_type,corss_val_label

    try:
        filepath = Parameters["filepath"]
        filename = Parameters["filename"]
        sequence_window = Parameters["sequence_window"]
        number_class = Parameters["number_class"]
        hidden_units = Parameters["hidden_units"]
        input_dim = Parameters["input_dim"]
        learning_rate = Parameters["learning_rate"]
        epoch = Parameters["epoch"]
        training_level = Parameters["training_level"]
        cross_cv = Parameters["cross_cv"]
        fixed_seed_num = Parameters["fixed_seed_num"]
        wave_type = Parameters["wave_type"]
        is_add_noise = Parameters["is_add_noise"]
        is_multi_scale = Parameters["is_multi_scale"]
        noise_ratio = Parameters["noise_ratio"]
        pooling_type = Parameters["pooling_type"]
    except:
        pass


    result_list_dict = defaultdict(list)
    evaluation_list = ["ACCURACY","F1_SCORE","AUC","G_MEAN"]
    for each in evaluation_list:
        result_list_dict[each] = []



    for tab_cv in range(cross_cv):
        if not tab_cv == corss_val_label: continue
        print("******************************"+str(tab_cv))
        #if corss_val_label == False:
            #if 'Nimda' in filename:
                #if not tab_cv == 1: continue
            #else:
            #if not tab_cv == 1 :continue#AS Leak, Code Red I, Slammer
        #else:
            #pass

        x_train, y_train,x_test, y_test = LoadData.GetData(pooling_type,is_add_noise,noise_ratio,'Attention',filepath, filename, sequence_window,tab_cv,cross_cv,Multi_Scale=is_multi_scale,Wave_Let_Scale=training_level,Wave_Type=wave_type)

        batch_size = min(len(y_train),len(y_test))
        #batch_size = Parameters["batch_size"]
        #x_train = x_train_multi_list
        #x_test = x_testing_multi_list

        #batch_size = 10
        if Label == "MS-LSTM":
            tf.reset_default_graph()
            tf.set_random_seed(fixed_seed_num)

            num_neurons = hidden_units
            # Network building
            if is_multi_scale == True and each_case == 2:
                #umber_scale_levels = training_level
                #u_w = tf.Variable(tf.random_normal(shape=[1,number_scale_levels]), name="u_w")
                #data_original_train = tf.placeholder(tf.float32,[number_scale_levels,batch_size,sequence_window,input_dim])
                #output_data_original_train = tf.Print(data_original_train,[data_original_train],"The Original Train  is :",first_n=4096,summarize=40)
                #data_original_train2 = tf.transpose(data_original_train,[1,2,3,0])
                #data_original_train_merged = batch_vm2(data_original_train2,tf.transpose(u_w_scales_normalized))
                #data_original_train_merged = tf.reshape(data_original_train_merged,(batch_size,sequence_window,input_dim))
                #lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_neurons, forget_bias=1.0, activation=tf.nn.tanh)
                #val_list, state_list = [tf.nn.dynamic_rnn(lstm_cell, tf.gather(data_original_train2,i), dtype=tf.float32) for i in range(number_scale_levels)]
                #print(val_list)
                #val = tf.transpose(val,[1,0,2])
                #val2_list = [tf.gather(tf.gather(val_list,i),val.get_shape()[0]-1) for i in range(number_scale_levels)]
                #val = tf.reshape(val,[batch_size*number_of_scales,num_neurons])
                #out_put_val = tf.Print(val_list,[val_list],"The val shape is :",first_n=4096,summarize=40)
                #out_put_val2 = tf.Print(val2_list,[val2_list],"The val2 shape is :",first_n=4096,summarize=40)
                #Weight_W = tf.Variable(tf.truncated_normal([num_neurons,sequence_window]))
                #out_put_Weight_W = tf.Print(Weight_W,[Weight_W],"The Weight_W is :",first_n=1024,summarize=10)
                #b_W = tf.Variable(tf.constant(0.1, shape=[sequence_window,sequence_window]))
                #out_put_b_W = tf.Print(b_W,[b_W.get_shape()],"The b_W shape is :",first_n=1024,summarize=10)
                #u_current_levels_temp = tf.matmul(val2,Weight_W)+b_W
                #out_put_u_current_levels_b_W = tf.Print(b_W,[b_W],"The b_W shape is :",first_n=4096,summarize=40)
                #out_put_u_current_levels_temp = tf.Print(u_current_levels_temp,[u_current_levels_temp],"The u_current_levels_temp  is :",first_n=4096,summarize=40)
                #out_put_u_current_u_w = tf.Print(u_w,[u_w],"The u_w shape is :",first_n=4096,summarize=40)
                #u_current_levels_total = tf.gather(tf.cumsum(tf.exp(batch_vm(u_current_levels_temp,tf.transpose(u_w)))),sequence_window-1)
                #print(tf.transpose(u_w).get_shape())
                #out_put_u_current_levels_total = tf.Print(u_current_levels_total,[u_current_levels_total],"The u_current_levels_total shape is :",first_n=4096,summarize=40)
                #out_put_u_w_scale = tf.Print(u_w_scales_normalized,[u_w_scales_normalized],"The u_w_scales shape is ----------------:",first_n=4096,summarize=40)
                #u_current_levels = tf.div(tf.exp(batch_vm(u_current_levels_temp,tf.transpose(u_w))),u_current_levels_total)
                #out_put_u_current_levels = tf.Print(u_current_levels,[u_current_levels],"The u_current_levels shape is :",first_n=4096,summarize=40)
                #target = tf.placeholder(tf.float32, [batch_size, number_class])
                #print("-----------------------------%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
                #print(val.get_shape())
                #m_total = batch_vm(tf.transpose(u_current_levels),val)
                #u_w_scales_normalized = u_current_levels
                #tf.assign(u_w_scales_normalized,u_current_levels)
                #m_total = tf.mul(tf.transpose(u_current_levels),val)
                #print(m_total.get_shape())
                #out_put_m_total_shape = tf.Print(m_total,[m_total.get_shape()],"The m_total shape is :",first_n=4096,summarize=40)
                #out_put_m_total = tf.Print(m_total,[m_total],"The m_total  is :",first_n=4096,summarize=40)
                #weight = tf.Variable(tf.truncated_normal([num_neurons, int(target.get_shape()[1])]))
                #bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))
                #prediction = tf.nn.softmax(tf.matmul(m_total, weight) + bias)
                #out_put_prediction = tf.Print(prediction,[prediction.get_shape()],"The prediction shape is :",first_n=1024,summarize=10)
                #print(prediction.get_shape())

                number_scale_levels = training_level
                u_w_scales_normalized = tf.Variable(tf.constant(1.0/number_scale_levels,shape=[1,number_scale_levels]), name="u_w")
                u_w_scales_normalized = normalized_scale_levels(u_w_scales_normalized)
                u_w = tf.Variable(tf.random_normal(shape=[1,sequence_window]), name="u_w")


                data_original_train = tf.placeholder(tf.float32,[number_scale_levels,batch_size,sequence_window,input_dim])

                output_data_original_train = tf.Print(data_original_train,[data_original_train],"The Original Train  is :",first_n=4096,summarize=40)

                #data_original_train = tf.placeholder(tf.float32,[batch_size,sequence_window,input_dim])
                data_original_train2 = tf.transpose(data_original_train,[1,2,3,0])
                data_original_train_merged = batch_vm2(data_original_train2,tf.transpose(u_w_scales_normalized))
                data_original_train_merged = tf.reshape(data_original_train_merged,(batch_size,sequence_window,input_dim))
                lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_neurons, forget_bias=1.0, activation=tf.nn.tanh)

                val, state = tf.nn.dynamic_rnn(lstm_cell, data_original_train_merged, dtype=tf.float32)
                #val = tf.transpose(val,[1,0,2])
                val2 = tf.gather(val,val.get_shape()[0]-1)
                #val = tf.reshape(val,[batch_size*number_of_scales,num_neurons])
                out_put_val = tf.Print(val,[val.get_shape()],"The val shape is :",first_n=4096,summarize=40)
                out_put_val2 = tf.Print(val2,[val2.get_shape()],"The val2 shape is :",first_n=4096,summarize=40)

                Weight_W = tf.Variable(tf.truncated_normal([num_neurons,sequence_window]))
                out_put_Weight_W = tf.Print(Weight_W,[Weight_W],"The Weight_W is :",first_n=1024,summarize=10)

                b_W = tf.Variable(tf.constant(0.1, shape=[sequence_window,sequence_window]))
                out_put_b_W = tf.Print(b_W,[b_W.get_shape()],"The b_W shape is :",first_n=1024,summarize=10)

                #tf.reshape(tf.matmul(tf.reshape(Aijk,[i*j,k]),Bkl),[i,j,l])

                #u_current_levels_temp = tf.reshape(tf.mul(tf.reshape(val,[batch_size*num_neurons],Weight_W)+b_W
                #print("val shape is ")
                #print(val2.get_shape())
                #print(Weight_W.get_shape())
                #print(b_W.get_shape())
                u_current_levels_temp = tf.matmul(val2,Weight_W)+b_W

                out_put_u_current_levels_b_W = tf.Print(b_W,[b_W],"The b_W shape is :",first_n=4096,summarize=40)
                out_put_u_current_levels_temp = tf.Print(u_current_levels_temp,[u_current_levels_temp],"The u_current_levels_temp  is :",first_n=4096,summarize=40)
                out_put_u_current_u_w = tf.Print(u_w,[u_w],"The u_w shape is :",first_n=4096,summarize=40)

                u_current_levels_total = tf.gather(tf.cumsum(tf.exp(batch_vm(u_current_levels_temp,tf.transpose(u_w)))),sequence_window-1)
                #print(tf.transpose(u_w).get_shape())
                out_put_u_current_levels_total = tf.Print(u_current_levels_total,[u_current_levels_total],"The u_current_levels_total shape is :",first_n=4096,summarize=40)
                out_put_u_w_scale = tf.Print(u_w_scales_normalized,[u_w_scales_normalized],"The u_w_scales shape is ----------------:",first_n=4096,summarize=40)

                u_current_levels = tf.div(tf.exp(batch_vm(u_current_levels_temp,tf.transpose(u_w))),u_current_levels_total)
                out_put_u_current_levels = tf.Print(u_current_levels,[u_current_levels],"The u_current_levels shape is :",first_n=4096,summarize=40)

                target = tf.placeholder(tf.float32, [batch_size, number_class])
                #print("-----------------------------%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
                #print(val.get_shape())


                m_total = batch_vm(tf.transpose(u_current_levels),val)

                #u_w_scales_normalized = u_current_levels
                #tf.assign(u_w_scales_normalized,u_current_levels)
                #m_total = tf.mul(tf.transpose(u_current_levels),val)

                #print(m_total.get_shape())

                out_put_m_total_shape = tf.Print(m_total,[m_total.get_shape()],"The m_total shape is :",first_n=4096,summarize=40)
                out_put_m_total = tf.Print(m_total,[m_total],"The m_total  is :",first_n=4096,summarize=40)

                weight = tf.Variable(tf.truncated_normal([num_neurons, int(target.get_shape()[1])]))
                bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))
                prediction = tf.nn.softmax(tf.matmul(m_total, weight) + bias)

                out_put_prediction = tf.Print(prediction,[prediction.get_shape()],"The prediction shape is :",first_n=1024,summarize=10)
                #print(prediction.get_shape())

            else:
                try:
                    number_scale_levels = training_level
                    u_w_scales_normalized = tf.Variable(tf.constant(1.0/number_scale_levels,shape=[1,number_scale_levels]), name="u_w")
                    u_w_scales_normalized = normalized_scale_levels(u_w_scales_normalized)
                    u_w = tf.Variable(tf.random_normal(shape=[1,sequence_window]), name="u_w")


                    data_original_train = tf.placeholder(tf.float32,[number_scale_levels,batch_size,sequence_window,input_dim])

                    output_data_original_train = tf.Print(data_original_train,[data_original_train],"The Original Train  is :",first_n=4096,summarize=40)

                    #data_original_train = tf.placeholder(tf.float32,[batch_size,sequence_window,input_dim])
                    data_original_train2 = tf.transpose(data_original_train,[1,2,3,0])
                    data_original_train_merged = batch_vm2(data_original_train2,tf.transpose(u_w_scales_normalized))
                    data_original_train_merged = tf.reshape(data_original_train_merged,(batch_size,sequence_window,input_dim))
                    lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_neurons, forget_bias=1.0, activation=tf.nn.tanh)

                    val, state = tf.nn.dynamic_rnn(lstm_cell, data_original_train_merged, dtype=tf.float32)

                    target = tf.placeholder(tf.float32, [batch_size, number_class])






                except:
                    data_original_train = tf.placeholder(tf.float32, [None,sequence_window,input_dim])
                    target = tf.placeholder(tf.float32, [None, number_class])
                    lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_neurons, forget_bias=1.0, activation=tf.nn.tanh)
                    val, state = tf.nn.dynamic_rnn(lstm_cell, data_original_train, dtype=tf.float32)


                val = tf.transpose(val, [1, 0, 2])
                last = tf.gather(val, int(val.get_shape()[0]) - 1)

                weight = tf.Variable(tf.truncated_normal([num_neurons, int(target.get_shape()[1])]))
                bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))

                prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)


            #cost_cross_entropy = -tf.reduce_mean(target * tf.log(prediction))
            cost_cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(prediction, target, name=None))  # Sigmoid

            #optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
            minimize = optimizer.minimize(cost_cross_entropy)

            #mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
            #error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
            correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(target, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

            init_op = tf.initialize_all_variables()
            sess = tf.Session()

            sess.run(init_op)


            no_of_batches = int(len(y_train) / batch_size)
            epoch_training_loss_list = []
            epoch_training_acc_list = []
            epoch_val_loss_list = []
            epoch_val_acc_list = []
            weight_list=[]
            early_stopping = 100
            epoch_stop = epoch
            for i in range(epoch):
                if early_stopping > 0:
                    pass
                else:
                    epoch_stop = i+1
                    break
                ptr = 0
                for j in range(no_of_batches):
                    inp, out = x_train[:,ptr:ptr + batch_size], y_train[ptr:ptr + batch_size]
                    inp2, out2 = x_test[:,ptr:ptr + batch_size], y_test[ptr:ptr + batch_size]
                    #print("INPUT IS ")
                    #print(inp)
                    #print("OUTPUT IS ")
                    #print(inp2)
                    #da.Plotting_Sequence(inp,out)
                    try:
                        #pass

                        sess.run(out_put_u_w_scale, {data_original_train: inp, target: out})
                        #sess.run(output__1,{data_original_train: inp, target: out})
                        #sess.run(output0,{data_original_train: inp, target: out})
                        #sess.run(output1,{data_original_train: inp, target: out})

                        #print("11111")
                        #print(out_put_u_w_scale)
                        #print("22222")
                        #print(normalized_scale_levels(out_put_u_w_scale))
                        #print(normalized_scale_levels(out_put_u_w_scale).shape)
                        #sess.run(tf.assign(u_w_scales,normalized_scale_levels(out_put_u_w_scale)))

                        #sess.run(out_put_original_train, {data_original_train: inp, target: out})
                        sess.run(out_put_val, {data_original_train: inp, target: out})
                        sess.run(out_put_val2, {data_original_train: inp, target: out})
                        #sess.run(out_put_Weight_W, {data_original_train: inp, target: out})
                        #sess.run(out_put_u_current_levels_temp, {data_original_train: inp, target: out})
                        #sess.run(out_put_u_current_u_w, {data_original_train: inp, target: out})
                        #sess.run(out_put_u_current_levels_b_W, {data_original_train: inp, target: out})

                        #sess.run(out_put_u_current_levels_total, {data_original_train: inp, target: out})
                        weight_list.append(sess.run(u_current_levels, {data_original_train: inp, target: out}))
                        #sess.run(out_put_m_total, {data_original_train: inp, target: out})
                        #sess.run(out_put_m_total_shape, {data_original_train: inp, target: out})

                        #sess.run(out_put_prediction, {data_original_train: inp, target: out})
                    except:
                        pass
                    #print(out)
                    ptr += batch_size
                    print(inp.shape)
                    sess.run(minimize, {data_original_train: inp,target: out})
                    training_acc,training_loss = sess.run((accuracy,cost_cross_entropy),{data_original_train: inp, target: out})
                        #sess.run(out_put_before_multi_first_level,{data_original_train: inp, target: out})
                        #sess.run(output_data_for_lstm_multi_scale,{data_original_train: inp, target: out})

                    epoch_training_loss_list.append(training_loss)
                    epoch_training_acc_list.append(training_acc)
                        #sess.run(out_put_before_multi_first_level,{data_original_train: inp, target: out})
                        #sess.run(out_put_before_multi_second_level,{data_original_train: inp, target: out})
                        #sess.run(out_put_before_multi_third_level,{data_original_train: inp, target: out})
                        #sess.run(out_put_after_multi_level,{data_original_train: inp, target: out})

                    #sess.run(minimize, {data_original_train: inp2,target: out2})

                    val_acc,val_loss = sess.run((accuracy,cost_cross_entropy),{data_original_train: inp2, target: out2})
                    epoch_val_loss_list.append(val_loss)
                    epoch_val_acc_list.append(val_acc)
                print("Epoch %s"%(str(i+1))+">"*20+"="+"train_accuracy: %s, train_loss: %s"%(str(training_acc),str(training_loss))\
                      +",\tval_accuracy: %s, val_loss: %s"%(str(val_acc),str(val_loss)))
                try:
                    max_val_acc = epoch_val_acc_list[-2]
                except:
                    max_val_acc = 0

                if epoch_val_acc_list[-1] < max_val_acc:
                    early_stopping -= 1
                elif epoch_val_acc_list[-1] >= max_val_acc:
                    early_stopping = 100
            #incorrect = sess.run(error, {data: x_test, target: y_test})
            #print("x_test shape is ..."+str(x_test.shape))
            #print(x_test)
            try:
                result = sess.run(prediction, {data_original_train: x_test, target: y_test})
            except:
                x_test = x_test[0:batch_size]
                y_test = y_test[0:batch_size]
                result = sess.run(prediction, {data_original_train: x_test, target: y_test})

            #print(result)
            #print("shape is ("+str(len(result))+","+str(len(result[0]))+')')
            #print('Epoch {:2d} error {:3.1f}%'.format(i + 1, 100 * incorrect))
            #if training_level > 0:
                #scale_weight = sess.run(scale_weight, {data_original_train: x_test, target: y_test})
                #print("The final scale weight is :\n")
                #print(scale_weight)
            #save_path = saver.save(sess, os.path.join(os.getcwd(),"modelckpt.txt"))
            #aaa = saver.restore(sess, os.path.join(os.getcwd(),"modelckpt.txt"))
            #all_variables = tf.trainable_variables()
            #var = [v for v in tf.trainable_variables() if v.name == "scale_weight"]
            sess.close()

        elif Label == "MS-LSTMB":
            pass

        results = Evaluation.Evaluation(y_test, result)#Computing ACCURACY, F1-Score, .., etc

        try:
            for each_eval, each_result in results.items():
                result_list_dict[each_eval].append(each_result)
            if len(Parameters) > 0:
                label = "PW"
            else:
                label = "DA"
        except:
            label = "AUC"

        #if len(Parameters) > 0:
            #try:
                #for each_eval, each_result in results.items():
                    #result_list_dict[each_eval].append(each_result)
                #label = "PW"
                #with open(os.path.join(os.getcwd(), "TensorFlow_Log" + filename + ".txt"), "a")as fout:
                  #  if training_level > 0:
                   #     outfileline = Label + "_____epoch:" + str(epoch) + ",_____learning rate:" + str(learning_rate) + ",_____multi_scale:" + str(is_multi_scale) + "\n"
                    #else:
                     #   outfileline = Label + "_____epoch:" + str(epoch) + ",_____learning rate:" + str(learning_rate) + ",_____multi_scale:" + str(is_multi_scale) + ",_____train_set_using_level:" + str(training_level) + "\n"

                    #fout.write(outfileline)
                    #for eachk, eachv in result_list_dict.items():
                     #   fout.write(eachk + ": " + str(round(eachv, 3)) + ",\t")
                    #fout.write('\n')

                #return results
            #except:
                #label = "AUC"
                #return Evaluation.Evaluation(y_test, result)#Plotting AUC
        #else:
            #for each_eval, each_result in results.items():
                #result_list_dict[each_eval].append(each_result)
            #label = "da"


        #if label == "AUC": return results
        if label == "DA":
            pass
            """
            y_test2 = np.array(Evaluation.ReverseEncoder(y_test))
            result2 = np.array(Evaluation.ReverseEncoder(result))
            with open("StatFalseAlarm_"+filename+"_True.txt","w") as fout:
                for tab in range(len(y_test2)):
                    fout.write(str(int(y_test2[tab]))+'\n')
            with open("StatFalseAlarm_"+filename+"_"+Label+"_"+"_Predict.txt","w") as fout:
                for tab in range(len(result2)):
                    fout.write(str(int(result2[tab]))+'\n')
            """
    try:
        for eachk, eachv in result_list_dict.items():
            result_list_dict[eachk] = np.average(eachv)
        print(result_list_dict)
        if is_add_noise == False:
            if corss_val_label == 0:
                outputfilename = "Tab_A_MS-LSTM_Log_"+filename+".txt"

            else:
                outputfilename = "Tab_B_MS-LSTM_Log_"+filename+".txt"
            with open(os.path.join(os.getcwd(),outputfilename),"a")as fout:
                if training_level>0:
                    outfileline = Label+"_epoch:"+str(epoch_stop)+",__wavelet type:"+str(wave_type)+",__pooling type:"+str(pooling_type)+",__learning rate:"+str(learning_rate)+",__multi_scale:"+str(is_multi_scale)+",__scale_levels:"+str(training_level)+",__sequence_window:"+str(sequence_window)+"\n"
                else:
                    outfileline = Label+"_epoch:"+str(epoch_stop)+",__wavelet type:"+str(wave_type)+",__learning rate:"+str(learning_rate)+",__multi_scale:"+str(is_multi_scale)+",__scale_levels:"+str(training_level)+",__sequence_window:"+str(sequence_window)+"\n"

                fout.write(outfileline)
                for eachk,eachv in result_list_dict.items():
                    fout.write(eachk+": "+str(round(eachv,3))+",\t")
                fout.write('\n')
        else:
            with open(os.path.join(os.getcwd(), "MS-LSTM_Log_Adding_Noise_" + filename + ".txt"), "a")as fout:
                if training_level > 0:
                    outfileline = Label + "_____epoch:" + str(epoch_stop) +",_____pooling type:"+str(pooling_type)+ ",_____learning rate:" + \
                        str(learning_rate) + ",_____multi_scale:" + str(is_multi_scale) + "\n"
                else:
                    outfileline = Label + "_____epoch:" + str(epoch_stop) + ",_____pooling type:"+str(pooling_type)+ ",_____learning rate:" + \
                        str(learning_rate) + ",_____multi_scale:" + str(is_multi_scale) + ",_____train_set_using_level:" + str(training_level) + "\n"

                fout.write(outfileline)
                for eachk, eachv in result_list_dict.items():
                    fout.write(eachk + ": " + str(round(eachv, 3)) + ",\t")
                fout.write('\n')
    except:
        pass
    #print("lallala")
    #print(epoch_training_loss_list)
    if not "DA"==label: return results
    return epoch_training_loss_list,epoch_val_loss_list,epoch_training_acc_list,epoch_val_acc_list,weight_list,results
Exemplo n.º 41
0
for img in Test:

    A, B, C = TD.get_indeces(img)
    B1 = B.reshape(np.prod(B.shape))
    batch = 1000
    num_batches = A.shape[0] / batch
    Sha = B.shape
    preds = np.zeros(shape = (A.shape[0], NC ))
    for i in range(num_batches):
        idx = range(i*batch, (i+1)*batch)
        K = A[idx]
        M, N , O= TD.Patch_gen(K,PS, C)
        preds[idx] = f_eval(M, N, O)
         
    if num_batches*batch < A.shape[0]:
        tot = num_batches*batch
        K = A[tot:] 
        M, N, O = TD.Patch_gen(K, PS, C)
        preds[tot:A.shape[0]] = f_eval(M, N, O)
        
    P = np.argmax(preds, axis = -1)
    MM = np.ravel_multi_index(A.T, np.asarray(B.shape))
    Final_pred = np.zeros(B1.shape)
    Final_pred[MM] = P
    Lab = B1.reshape(Sha) 
    Segs = Final_pred.reshape(Sha)
    Dice = np.append(Dice,  [E.Dice_score(Segs, Lab, 1)])
    print Dice 
    io.savemat("/home/xvt131/Biomediq/Results/valiBrain/%s" %(img[45:60]), mdict= {"Seg":Segs,"Lab":Lab} )
Exemplo n.º 42
0
def test_confusion_matrix(y_pred):
    y_true = [0, 1]
    expect = confusion_matrix(y_true, y_pred).flatten().tolist()
    assert ev.confusion_matrix(y_true, y_pred) == expect