示例#1
0
文件: p3.py 项目: ealiasannila/iml
def doKNN(k):
	dm = cdist(teXf, trXf,'euclidean')
	cfm = np.zeros((10,10), dtype = int)
	for	a in range(0,len(dm)):
		knn = np.argpartition(dm[a], k)[:k]
		preds = trY[knn]
		counts = np.bincount(preds)
		pred = -1
		if len(counts)>=2:
			top2 = np.argpartition(-counts, 1)	
			if counts[top2[0]] == counts[top2[1]]:
				d = 99999
				for i in xrange(0,len(knn)):
					val = dm[a][i]
					if val < d:
						d = dm[a][i]
						pred = trY[knn[i]]
			else:		
				pred = top2[0]
		else:
			pred = 0
		#print pred
		#mnist.visualize(teX[a])
		cfm[teY[a]][pred] += 1
	#print cfm
	#print "ER: ", 1 - np.sum(np.diagonal(cfm))/np.sum(cfm)
	
	return cfm
示例#2
0
 def similarity_matrix(self):
     """ Calculate the similarity matrix given all samples used for GTM map training
     :return: similarity_matrix: Matrix assessing the similarity between samples used for GTM map training
     """
     print "Calculating similarity matrix..."
     # Find one tenth of the highest and lowest probability distribution values for each sample in the latent space
     sim_size = int(round(self.latent_space_size/10))
     responsibility_indexes = np.zeros((sim_size * 2, self.input_data.shape[0]))
     corr_input = np.zeros((sim_size * 2, self.input_data.shape[0]))
     for i in xrange(0, self.input_data.shape[0]):
         responsibility_indexes[0:sim_size, i] = np.argpartition(self.gtm_responsibility[:, i],
                                                                 -sim_size)[-sim_size:]
         responsibility_indexes[sim_size:, i] = np.argpartition(self.gtm_responsibility[:, i], sim_size)[0:sim_size]
     responsibility_indexes = responsibility_indexes.astype(int)
     # Create correlation input matrix for similarity assessment
     for i in xrange(0, self.input_data.shape[0]):
         corr_input[:, i] = self.gtm_responsibility[responsibility_indexes[:, i], i]
     # Calculate correlation between all samples and build similarity matrix
     similarity_matrix = np.corrcoef(np.transpose(corr_input))
     # Plot heat map of the similarity matrix accordingly
     [x, y] = np.meshgrid(np.linspace(1, self.input_data.shape[0], self.input_data.shape[0]),
                          np.linspace(1, self.input_data.shape[0], self.input_data.shape[0]))
     x = np.ravel(x)
     y = np.ravel(y)
     sim_lat = np.array([x, y])
     print "Plotting color mesh image..."
     plt.pcolormesh(np.reshape(sim_lat[0, :], (self.input_data.shape[0], self.input_data.shape[0])),
                np.reshape(sim_lat[1, :], (self.input_data.shape[0], self.input_data.shape[0])), similarity_matrix,
                cmap='magma', vmin=0, vmax=1)
     plt.colorbar()
     plt.axis([x.min(), x.max(), y.min(), y.max()])
     plt.gca().invert_yaxis()
     return similarity_matrix
def predict_variance_inf_phase1(budget, hum_train_means, temp_train_means, hum_train_vars, temp_train_vars):
    """Method to make predictions based on max-variance active inference."""         
    start_hum = 0
    window_hum = None
    window_temp = None
    i = 0

    hum_preds = np.ones((50, 96))
    temp_preds = np.ones((50, 96))

    for t in global_times:
        if budget > 0:
            window_hum = np.argpartition(hum_train_vars[t], -budget)[-budget:]
            window_temp = np.argpartition(temp_train_vars[t], -budget)[-budget:]
        else:
            window_hum = np.array([])
            window_temp = np.array([])

        hum_pred, temp_pred = makePreds_phase1(window_hum, window_temp, hum_train_means, temp_train_means, i, t)

        hum_preds[:, i] = copy.deepcopy(hum_pred)
        temp_preds[:, i] = copy.deepcopy(temp_pred)
        
        i += 1

    hum_mean_err = mean_absolute_error(hum_test, hum_preds)
    temp_mean_err = mean_absolute_error(temp_test, temp_preds)

    return hum_preds, temp_preds, hum_mean_err, temp_mean_err
		def precision_test_function(theano_inputs):
			k = 10
			scores1, scores2, c_select, n_used_items = theano_test_function(*theano_inputs)
			ids1 = np.argpartition(-scores1, range(k), axis=-1)[0, :k]
			ids2 = np.argpartition(-scores2, range(k), axis=-1)[0, :k]
			
			return ids1, ids2, c_select, n_used_items
    def branch_to_nodes(self, wt, completion):
        """
        Decide which nodes to branch to next
        """
        missing_edges = HGT.get_missing_edges(completion) # Obtain the missing edge sparse list

        nb = self.strat.node_brancher
        
        # Determine if there is a maximum count
        count_max = min(self.strat.max_node_branch, self.num_nodes)
        
        if nb is None or not 'name' in nb: # Default
            # Gets nodes that contribute to missing edge
            edge = missing_edges.indices[0] # Grab any next edge
            node_indices = self.H[:, edge].indices
        elif nb['name'] == 'greedy' or nb['name'] == 'long':
            # Gets the nodes that overlap the most(least) with what's missing
            overlap = self.H.dot(missing_edges.T)
            # k = min(count_max + wt.nnz, overlap.nnz)
            k = min(count_max, overlap.nnz)
            if k >= self.num_nodes or k == overlap.nnz:
                if nb['name'] == 'greedy':
                    alg_slice = np.argsort(overlap.data)[::-1]
                else: # long
                    alg_slice = np.argsort(overlap.data)
            else: # Else be smart, don't perform O(nlogn) operations, perform O(k) operations
                if nb['name'] == 'greedy':
                    alg_slice = np.argpartition(overlap.data, -k)[-k:]
                else: #long
                    alg_slice = np.argpartition(overlap.data, k)[:k]
            node_indices = overlap.indices[alg_slice]
        elif nb['name'] == 'random':
            # Gets nodes that contribute to random missing edge
            edge = np.random.choice(missing_edges.indices) # Grab any next edge
            node_indices = self.H[:, edge].indices
        elif nb['name'] == 'diverse':
            # Diversify the kinds of transversals that have been found
            if wt.nnz == 0: # Just starting out
                node_indices = np.arange(self.num_nodes) # Branch to everything
            else: # Otherwise be greedy up to one
                # edge = missing_edges.indices[0] # Grab any next edge
                # node_indices = [self.H[:, edge].indices[0]]
                # overlap = self.H.dot(missing_edges.T)
                # node_indices = [overlap.indices[np.argmax(overlap.data)]]
                scaled_overlap = overlap.data / (self.node_weights[overlap.indices]**2)
                node_indices = overlap.indices[np.where(np.max(scaled_overlap) == scaled_overlap)]
        else:
            raise ValueError("Invalid strat.node_brancher: {0}".format(self.strat.node_brancher))
        
        if nb is not None and bool(nb.get('shuffle', False)):
            np.random.shuffle(node_indices)
        
        count = 0
        for i in node_indices:
            if count >= count_max:
                break
            if not wt[i, 0] > 0: # not already part of working transversal
                self.log('Branching to node:', i)
                count += 1
                yield i
示例#6
0
def local_kmeans_class(I, L, x, k):
    from scipy.spatial.distance import cdist

    sizex = len(np.atleast_2d(x))
    label = np.zeros((sizex,k))
    for rowsx in range(0, sizex):
        tic()
        dists = cdist(I, np.atleast_2d(x[rowsx]), metric='euclidean')
        toc()
        center = np.zeros((10,k,28*28))
        label_order = np.unique(L)
        l=0
        tic()
        thing = np.zeros((k,28*28))
        for labs in np.unique(L):
            indices = L == labs
            k_smallest = np.argpartition(dists[indices],tuple(range(1,k)),axis=None)
            for i in range(0,k):
                M = I[indices]
                #center[l,i,:] = np.average(M[k_smallest[:i+1]],axis = 0)
                if i == 0:
                    thing[i] = M[k_smallest[i+1]]
                else:
                    thing[i] = thing[i-1] + M[k_smallest[i+1]]
            center[l,:,:] = np.divide(thing,np.repeat(np.arange(1,11).reshape(10,1),28*28,axis=1))
            l+=1
        toc()
        for i in range(k):
            #print(cdist(center[:,i,:], np.atleast_2d(x[rowsx]), metric='euclidean'))
            dists2center = cdist(center[:,i,:], np.atleast_2d(x[rowsx]), metric='euclidean')
            k_smallest = np.argpartition(dists2center,tuple(range(1)),axis=None)
            label[rowsx,i] = label_order[k_smallest[0]]
    return label
示例#7
0
 def construct_initial_solution(self):
   ind = np.argpartition(self.collaboration_coo.data, -len(self.villains_team))[-len(self.villains_team):]
   inc = 1
   while len(np.unique(self.collaboration_coo.row[ind])) < len(self.villains_team):
     ind = np.argpartition(self.collaboration_coo.data, -(len(self.villains_team) + inc))[-(len(self.villains_team) + inc):]
     inc += 1
   heroes_team = self.heroes.loc[self.heroes[CHARACTER_ID].isin(self.collaboration_coo.row[ind])]
   return heroes_team
def similarityPlot():
	import matplotlib.pyplot as plt
	from matplotlib import rcParams
	tfidf_vectorizer = TfidfVectorizer(min_df=1)
	names = friendsAboveMinNumMessages(200) + [me]
	data = []
	words = [] #ordering of words in tf_idf matrix
	wordsSet = set() #for faster lookup
	nameSet = set()
	for person in personDict:
		for name in person.split():
			nameSet.add(name)
			nameSet.add(name.lower())
	for i in range(len(names)):
		data.append(getAllMessagesAsString(names[i], False))
	tfidf_matrix = tfidf_vectorizer.fit_transform(data)
	featureNames = tfidf_vectorizer.get_feature_names()
	tfidf_arr = tfidf_matrix.toarray()
	for j in range(len(tfidf_arr[0])):
		word = tfidf_arr[0][j]
		if word not in wordsSet:
			words.append(word)
			wordsSet.add(j)
	#nmds = manifold.MDS(metric = True, n_components = N_DISTINGUISHING_FEATURES) 
	#npos = nmds.fit_transform(tfidf_matrix.toarray())
	clf = PCA(n_components=2)
	npos = clf.fit_transform(tfidf_arr)
	plt.scatter(npos[:, 0], npos[:, 1], marker = 'o', c = 'b', cmap = plt.get_cmap('Spectral')) #change colors
	for name, x, y in zip(names, npos[:, 0], npos[:, 1]):
		plt.annotate(
			name, 
			xy = (x, y), xytext = (-20, 20),
			textcoords = 'offset points', ha = 'right', va = 'bottom',
			bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
			arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))
	fig, ax = plt.subplots()
	ax2 = ax.twinx()
	xAxisP = [featureNames[i] for i in np.argpartition(clf.components_[0], -50)[-50:] if featureNames[i] not in nameSet]
	yAxisP = [featureNames[i] for i in np.argpartition(clf.components_[1], -50)[-50:] if featureNames[i] not in nameSet]
	xAxisN = [featureNames[i] for i in np.argpartition(-clf.components_[0], -50)[-50:] if featureNames[i] not in nameSet]
	yAxisN = [featureNames[i] for i in np.argpartition(-clf.components_[1], -50)[-50:] if featureNames[i] not in nameSet]
	ax.set_xlabel("Most Postively influential words along x axis:\n" + ", ".join(xAxisP), fontsize=18)
	ax.set_ylabel("Most Postively influential words along y axis:\n" + ", ".join(yAxisP), fontsize=18)
	ax2.set_xlabel("Most Negatively influential words along x axis:\n" + ", ".join(xAxisN), fontsize=18)
	ax2.set_ylabel("Most Negatively influential words along y axis:\n" + ", ".join(yAxisN), fontsize=18)
	# xAxis = [featureNames[i] for i in np.argpartition(np.absolute(clf.components_[0]), -50)[-50:] if featureNames[i] not in nameSet]
	# yAxis = [featureNames[i] for i in np.argpartition(np.absolute(clf.components_[1]), -50)[-50:] if featureNames[i] not in nameSet]
	# for i in range(1, max(len(xAxis), len(yAxis)) ):
	# 	if i % 20 == 0 and i < len(xAxis):
	# 		xAxis[i] += "\n"
	# 	if i % 15 == 0 and i < len(yAxis):
	# 		yAxis[i] += "\n"
	# plt.xlabel("Most influential words along x axis:\n" + ", ".join(xAxis), fontsize=18)
	# plt.ylabel("Most influential words along y axis:\n" + ", ".join(yAxis), fontsize=18)
	rcParams.update({'figure.autolayout': True})
	plt.suptitle("Word-Usage Similarity Scatterplot", fontsize = 24, fontweight = 'bold')
	plt.show()
示例#9
0
 def _get_k_max_elements_indices_and_scores(vec, k, mask=None):
     if mask is None:
         # We use argpartition here instead of argsort to achieve linear-time performance.
         max_elements_indices = np.argpartition(-vec, k - 1)[:k]
     else:
         masked_vec = vec.copy()  # To avoid side-effects
         masked_vec[~mask] = -np.inf
         max_elements_indices = np.argpartition(-masked_vec, k - 1)[:k]
     return max_elements_indices, vec[max_elements_indices]
示例#10
0
	def _phase2(self):
		"""
		Execute phase 2 of the SP region. This phase is used to compute the
		active columns.
		
		Note - This should only be called after phase 1 has been called and
		after the inhibition radius and neighborhood have been updated.
		"""
		
		# Shift the outputs
		self.y[:, 1:] = self.y[:, :-1]
		self.y[:, 0] = 0
		
		# Calculate k
		#   - For a column to be active its overlap must be at least as large
		#     as the overlap of the k-th largest column in its neighborhood.
		k = self._get_num_cols()
		
		if self.global_inhibition:
			# The neighborhood is all columns, thus the set of active columns
			# is simply columns that have an overlap >= the k-th largest in the
			# entire region
			
			# Compute the winning column indexes
			if self.learn:				
				# Randomly break ties
				ix = np.argpartition(-self.overlap[:, 0] -
					self.prng.uniform(.1, .2, self.ncolumns), k - 1)[:k]
			else:
				# Choose the same set of columns each time
				ix = np.argpartition(-self.overlap[:, 0], k - 1)[:k]
			
			# Set the active columns
			self.y[ix, 0] = self.overlap[ix, 0] > 0
		else:
			# The neighborhood is bounded by the inhibition radius, therefore
			# each column's neighborhood must be considered
			
			for i in xrange(self.ncolumns):
				# Get the neighbors
				ix = np.where(self.neighbors[i])[0]
				
				# Compute the minimum top overlap
				if ix.shape[0] <= k:
					# Desired number of candidates is at or below the desired
					# activity level, so find the overall min
					m = max(bn.nanmin(self.overlap[ix, 0]), 1)
				else:
					# Desired number of candidates is above the desired
					# activity level, so find the k-th largest
					m = max(-np.partition(-self.overlap[ix, 0], k - 1)[k - 1],
						1)
				
				# Set the column activity
				if self.overlap[i, 0] >= m: self.y[i, 0] = True
示例#11
0
文件: mrpt.py 项目: Kitware/SMQTK
        def _build_recursive(indices, level=0, split_index=0):
            """
            Descend recursively into tree to build it, setting splits and
            returning indices for leaves

            :param indices: The current set of indices before partitioning
            :param level: The level in the tree
            :param split_index: The index of the split to set

            :return: A list of arrays representing leaf membership
            :rtype: list[np.ndarray]
            """
            # If we're at the bottom, no split, just return the set
            if level == self._depth:
                return [indices]

            n = indices.size
            # If we literally don't have enough to populate the leaf, make it
            # empty
            if n < 1:
                return []

            # Get the random projections for these indices at this level
            # NB: Recall that the projection matrix has shape (levels, N)
            level_proj = proj[indices, level]

            # Split at the median if even, put median in upper half if not
            n_split = n // 2
            if n % 2 == 0:
                part_indices = np.argpartition(
                    level_proj, (n_split - 1, n_split))
                split_val = level_proj[part_indices[n_split - 1]]
                split_val += level_proj[part_indices[n_split]]
                split_val /= 2.0
            else:
                part_indices = np.argpartition(level_proj, n_split)
                split_val = level_proj[part_indices[n_split]]

            splits[split_index] = split_val

            # part_indices is relative to this block of values, recover
            # main indices
            left_indices = indices[part_indices[:n_split]]
            right_indices = indices[part_indices[n_split:]]

            # Descend into each split and get sub-splits
            left_out = _build_recursive(left_indices, level=level + 1,
                                        split_index=2 * split_index + 1)
            right_out = _build_recursive(right_indices, level=level + 1,
                                         split_index=2 * split_index + 2)

            # Assemble index set
            left_out.extend(right_out)
            return left_out
示例#12
0
def fitOneLinearRegression(thetaLinear, IntensityLinear, tiltanglesArray, options):
	if (len(tiltanglesArray)%2 == 1):
		halfN = int(len(tiltanglesArray)/2) + 1
		xLeft, yLeft = thetaLinear[0:halfN], IntensityLinear[0:halfN]
		xRight, yRight = thetaLinear[halfN-1:], IntensityLinear[halfN-1:]
		
	else:
		halfN = int(len(tiltanglesArray)/2)
		xLeft, yLeft = thetaLinear[0:halfN], IntensityLinear[0:halfN]
		xRight, yRight = thetaLinear[halfN:], IntensityLinear[halfN:]
	
	slopeLeft, interceptLeft, r2Left = linearRegression(xLeft, yLeft)
        slopeRight, interceptRight, r2Right = linearRegression(xRight, yRight)
	
	assert(len(xLeft)==len(xRight))
	
	fitLeft = slopeLeft*xLeft + interceptLeft
        fitRight = slopeRight*xRight + interceptRight
        
        #the sum of squared residuals
        resLeft = yLeft - fitLeft
	resLeft = resLeft / fitLeft
	#print "resLeft", resLeft
        resRight = yRight - fitRight
	resRight = resRight / fitRight
	#print "resRight", resRight
	
	fresLeft = sum(resLeft**2)
        fresRight = sum(resRight**2)
	fres = [fresLeft*1000000, fresRight*1000000]

	#find the points with the largest 3 residuals in left and right branches, use numpy.argpartition
	#N = options.largestNRes
	N=3
        negN = (-1)*N
        indexLargeLeft = np.argpartition(resLeft**2, negN)[negN:]
        indexLargeRight = np.argpartition(resRight**2, negN)[negN:]
	
	M=3
	#M = options.smallestNRes
	posM = M
	indexSmallLeft = np.argpartition(resLeft**2, posM)[:posM]
	indexSmallRight = np.argpartition(resRight**2, posM)[:posM]
	
        #MSE, under the assumption that the population error term has a constant variance, the estimate of that variance is given by MSE, mean square error
        #The denominator is the sample size reduced by the number of model parameters estimated from the same data, (n-p) for p regressors or (n-p-1) if an intercept is used.
        #In this case, p=1 so the denominator is n-2.
        stdResLeft = np.std(resLeft, ddof=2)
        stdResRight = np.std(resRight, ddof=2)
	stdRes = [stdResLeft*1000, stdResRight*1000]
	ret = fres, stdRes, xLeft, yLeft, fitLeft, xRight, yRight, fitRight, indexLargeLeft, indexLargeRight, indexSmallLeft, indexSmallRight, resLeft, resRight, slopeLeft, interceptLeft, slopeRight, interceptRight
	return ret
示例#13
0
    def define_toplogy(self, num_input, num_hidden,  num_output, density):
        """
        Defines the topology of the OpenBrain network.
        :param num_input:
        :param num_hidden:
        :param num_output:
        :param density:
        :return:
        """
        topo = networkx.DiGraph(networkx.watts_strogatz_graph(self.num_neurons, 5, density, seed=None)).to_directed()
        adjacency_list = topo.adjacency_list()


        # Pick the output neurons to be those with highest in degree
        in_deg = np.array([topo.in_degree(x) for x,_ in enumerate(adjacency_list)])
        self.output_neurons = np.argpartition(in_deg, -num_output)[-num_output:]
        print(self.output_neurons)
        print([topo.in_degree(x) for x in self.output_neurons])

        # Pick the input neurons to be those with highest out degree
        out_deg = np.array([topo.out_degree(x) if x not in self.output_neurons else -1
                            for x,_ in enumerate(adjacency_list)])
        self.input_neurons = np.argpartition(out_deg, -num_input)[-num_input:]

        # Output neurons do not fire out.
        for adjacent_neurons in adjacency_list:
            for out_neuron in self.output_neurons:
                if out_neuron in adjacent_neurons:
                    adjacent_neurons.remove(out_neuron)

        # Disconnect input -> output
        for out in self.output_neurons:
            for inp in self.input_neurons:
                if out in adjacency_list[inp]: adjacency_list[inp].remove(out)
                if inp in adjacency_list[out]: adjacency_list[out].remove(inp)


        for i, adjacent in enumerate(adjacency_list):
            if i not in self.input_neurons and i not in self.output_neurons:
                for n in adjacent:
                    if i in adjacency_list[n]:
                        if np.random.rand(1)>0.5:
                            adjacent.remove(n)
                        else:
                            adjacency_list[n].remove(i)

        # Let nothing enter the input neurons
        for inp in self.input_neurons:
            adjacency_list[inp] = []

        return adjacency_list
示例#14
0
def sort_by_relative_entropy(corpus, topicct, stemmer):
    # get the right file names for the corpus and count
    stemmed_weights = ['wordweights/' + fname for fname in os.listdir('wordweights')
            if fname.startswith('{}-{}-{}'.format(corpus, stemmer, topicct))]
    unstemmed_weights = ['wordweights/' + fname for fname in os.listdir('wordweights')
            if fname.startswith('{}-{}-{}'.format(corpus, UNSTEMMED_NAME, topicct))]
    stemmed_corpus_file = 'corpora/{}-train-{}-stopped.txt'.format(corpus, stemmer)
    unstemmed_corpus_file = 'corpora/{}-train-{}-stopped.txt'.format(corpus, UNSTEMMED_NAME)

    # get the mapping from unstemmed to stemmed words
    stemmed_to_unstemmed = defaultdict(set)
    unstemmed_counts = Counter()
    with open(stemmed_corpus_file) as f, open(unstemmed_corpus_file) as g:
        for stemmed_line in f:
            stemmed_words = stemmed_line.split()[3:]
            unstemmed_words = g.readline().split()[3:]
            assert(len(stemmed_words) == len(unstemmed_words))
            for uword, sword in zip(unstemmed_words, stemmed_words):
                stemmed_to_unstemmed[sword].add(uword)
                unstemmed_counts[uword] += 1

    # for each file; for each word; get the entropy
    stemmed_entropies = defaultdict(list)
    unstemmed_entropies = defaultdict(list)
    for file in stemmed_weights:
        entropy_dict = get_stemmed_entropy_per_word(file)
        for k, v in entropy_dict.iteritems():
            stemmed_entropies[k].append(v)
    for file in unstemmed_weights:
        entropy_dict = get_unstemmed_entropy_per_word(file, stemmed_to_unstemmed, int(topicct))
        for k, v in entropy_dict.iteritems():
            unstemmed_entropies[k].append(v)

    # compute difference of average entropies
    stemmed_vocab = [sword for sword, uwords in stemmed_to_unstemmed.iteritems() if len(uwords) > 1]
    entropy_diffs = np.zeros(len(stemmed_vocab))
    for i, sword in enumerate(stemmed_vocab):
        entropy_diffs[i] = np.mean(stemmed_entropies[sword]) - np.mean(unstemmed_entropies[sword])

    # find top 50 maximum and minimum entropies
    min_indices = np.argpartition(entropy_diffs, 50)[:50]
    max_indices = np.argpartition(entropy_diffs, -50)[-50:]
    with open('wordlists/{}-{}-{}.txt'.format(corpus, stemmer, topicct), 'w') as wf:
        wf.write('Lowest entropy differences (stemmed is better)\n')
        for i in min_indices:
            wf.write('{}\t{}\t{}\n'.format(entropy_diffs[i], stemmed_vocab[i], ' '.join(stemmed_to_unstemmed[stemmed_vocab[i]])))
        wf.write('Highest entropy differences (unstemmed is better)\n')
        for i in max_indices:
            wf.write('{}\t{}\t{}\n'.format(entropy_diffs[i], stemmed_vocab[i], ' '.join(stemmed_to_unstemmed[stemmed_vocab[i]])))
示例#15
0
def disp_results(fig, ax1, ax2, loss_iterations, losses, accuracy_iterations, accuracies, accuracies_iteration_checkpoints_ind, fileName, color_ind=0):
    modula = len(plt.rcParams['axes.color_cycle'])
    acrIterations =[]
    top_acrs={}
    if accuracies.size:
        if 	accuracies.size>4:
		    top_n = 4
        else:
            top_n = accuracies.size -1		
        temp = np.argpartition(-accuracies, top_n)
        result_indexces = temp[:top_n]
        temp = np.partition(-accuracies, top_n)
        result = -temp[:top_n]
        for acr in result_indexces:
            acrIterations.append(accuracy_iterations[acr])
            top_acrs[str(accuracy_iterations[acr])]=str(accuracies[acr])

        sorted_top4 = sorted(top_acrs.items(), key=operator.itemgetter(1))
        maxAcc = np.amax(accuracies, axis=0)
        iterIndx = np.argmax(accuracies)
        maxAccIter = accuracy_iterations[iterIndx]
        maxIter =   accuracy_iterations[-1]
        consoleInfo = format('\n[%s]:maximum accuracy [from 0 to %s ] = [Iteration %s]: %s ' %(fileName,maxIter,maxAccIter ,maxAcc))
        plotTitle = format('max accuracy(%s) [Iteration %s]: %s ' % (fileName,maxAccIter, maxAcc))
        print (consoleInfo)
        #print (str(result))
        #print(acrIterations)
       # print 'Top 4 accuracies:'		
        print ('Top 4 accuracies:'+str(sorted_top4))		
        plt.title(plotTitle)
    ax1.plot(loss_iterations, losses, color=plt.rcParams['axes.color_cycle'][(color_ind * 2 + 0) % modula])
    ax2.plot(accuracy_iterations, accuracies, plt.rcParams['axes.color_cycle'][(color_ind * 2 + 1) % modula], label=str(fileName))
    ax2.plot(accuracy_iterations[accuracies_iteration_checkpoints_ind], accuracies[accuracies_iteration_checkpoints_ind], 'o', color=plt.rcParams['axes.color_cycle'][(color_ind * 2 + 1) % modula])
    plt.legend(loc='lower right') 
示例#16
0
 def get_features(self, _input):
     d = self.prototypes - _input
     d = np.sqrt(sum(d.T ** 2))  # get Euclidian distance
     indexes = np.argpartition(d, self.c, axis=0)[:self.c]
     phi = np.zeros(self.numPrototypes)
     phi[indexes] = 1
     return phi
示例#17
0
    def _kneighbors_reduce_func(self, dist, start,
                                n_neighbors, return_distance):
        """Reduce a chunk of distances to the nearest neighbors

        Callback to :func:`sklearn.metrics.pairwise.pairwise_distances_chunked`

        Parameters
        ----------
        dist : array of shape (n_samples_chunk, n_samples)
        start : int
            The index in X which the first row of dist corresponds to.
        n_neighbors : int
        return_distance : bool

        Returns
        -------
        dist : array of shape (n_samples_chunk, n_neighbors), optional
            Returned only if return_distance
        neigh : array of shape (n_samples_chunk, n_neighbors)
        """
        sample_range = np.arange(dist.shape[0])[:, None]
        neigh_ind = np.argpartition(dist, n_neighbors - 1, axis=1)
        neigh_ind = neigh_ind[:, :n_neighbors]
        # argpartition doesn't guarantee sorted order, so we sort again
        neigh_ind = neigh_ind[
            sample_range, np.argsort(dist[sample_range, neigh_ind])]
        if return_distance:
            if self.effective_metric_ == 'euclidean':
                result = np.sqrt(dist[sample_range, neigh_ind]), neigh_ind
            else:
                result = dist[sample_range, neigh_ind], neigh_ind
        else:
            result = neigh_ind
        return result
示例#18
0
文件: matutils.py 项目: lopusz/gensim
def argsort(x, topn=None, reverse=False):
    """Get indices of the `topn` smallest elements in array `x`.

    Parameters
    ----------
    x : array_like
        Array to sort.
    topn : int, optional
        Number of indices of the smallest(greatest) elements to be returned if given,
        otherwise - indices of all elements will be returned in ascending(descending) order.
    reverse : bool, optional
        If True - return the `topn` greatest elements, in descending order.

    Returns
    -------
    numpy.ndarray
        Array of `topn` indices that.sort the array in the required order.

    """
    x = np.asarray(x)  # unify code path for when `x` is not a np array (list, tuple...)
    if topn is None:
        topn = x.size
    if topn <= 0:
        return []
    if reverse:
        x = -x
    if topn >= x.size or not hasattr(np, 'argpartition'):
        return np.argsort(x)[:topn]
    # np >= 1.8 has a fast partial argsort, use that!
    most_extreme = np.argpartition(x, topn)[:topn]
    return most_extreme.take(np.argsort(x.take(most_extreme)))  # resort topn into order
示例#19
0
def computeRanks(composedSpace, observedSpace):
    """Ranks all the representations in the composed space with respect to 
    the representations in the observed space. Cut-off value 1000"
    """
    ranks = {}
    rankList = []

    composedWords = set(composedSpace.get_id2row())
    observedWords = observedSpace.get_id2row()
    neighbours = 1000

    for w_idx, word in enumerate(composedWords):
        vector = composedSpace.get_row(word)
        Y = 1 - cdist(vector.mat, observedSpace.get_cooccurrence_matrix().mat, 'cosine')
        nearest = Y.argmax()
        nearest_k_indices = np.argpartition(Y, tuple([-p for p in range(neighbours)]), axis=None)[-neighbours:]
        # pp([(observedWords[idx], Y[0][idx]) for idx in reversed(nearest_k_indices)])
        words = [observedWords[idx] for idx in reversed(nearest_k_indices)]
        wordRanks = {word:index+1 for index,word in enumerate(words)}
        # print(wordRanks)

        if (word in wordRanks):
            r = wordRanks[word]
            ranks[word] = r
            rankList.append(r)

        else:
            ranks[word] = 1000
            rankList.append(1000)

        if ((w_idx > 0) and (w_idx % 100 == 0)):
            print(w_idx)

    return rankList, ranks
示例#20
0
	def similar_movies(self, weights, base_movie, movies = None, n = 6):
		""" Gets the n similar movies to a base movie. """
		fv = self.features(base_movie, movies = movies)
		wv = weights.reshape((weights.shape[1],1))
		scores = fv.dot(wv)
		inds = np.argpartition(scores,-n, axis = 0)[-n:].reshape(n)
		return [self.movie_indices[i]for i in inds]
示例#21
0
def pspace(h1e, eri, norb, nelec, hdiag, np=400):
    '''pspace Hamiltonian to improve Davidson preconditioner. See, CPL, 169, 463
    '''
    neleca, nelecb = _unpack_nelec(nelec)
    h1e = numpy.ascontiguousarray(h1e)
    eri = ao2mo.restore(1, eri, norb)
    nb = cistring.num_strings(norb, nelecb)
    if hdiag.size < np:
        addr = numpy.arange(hdiag.size)
    else:
        try:
            addr = numpy.argpartition(hdiag, np-1)[:np]
        except AttributeError:
            addr = numpy.argsort(hdiag)[:np]
    addra, addrb = divmod(addr, nb)
    stra = numpy.array([cistring.addr2str(norb,neleca,ia) for ia in addra],
                       dtype=numpy.uint64)
    strb = numpy.array([cistring.addr2str(norb,nelecb,ib) for ib in addrb],
                       dtype=numpy.uint64)
    np = len(addr)
    h0 = numpy.zeros((np,np))
    libfci.FCIpspace_h0tril(h0.ctypes.data_as(ctypes.c_void_p),
                            h1e.ctypes.data_as(ctypes.c_void_p),
                            eri.ctypes.data_as(ctypes.c_void_p),
                            stra.ctypes.data_as(ctypes.c_void_p),
                            strb.ctypes.data_as(ctypes.c_void_p),
                            ctypes.c_int(norb), ctypes.c_int(np))

    for i in range(np):
        h0[i,i] = hdiag[addr[i]]
    h0 = lib.hermi_triu(h0)
    return addr, h0
示例#22
0
文件: metric.py 项目: j-dr/bigbrother
    def splitBimodal(self, x, y, largepoly=30):
        p = np.polyfit(x, y, largepoly) # polynomial coefficients for fit

        extrema = np.roots(np.polyder(p))
        extrema = extrema[np.isreal(extrema)]
        extrema = extrema[(extrema - x[1]) * (x[-2] - extrema) > 0] # exclude the endpoints due false maxima during fitting
        try:
            root_vals = [sum([p[::-1][i]*(root**i) for i in range(len(p))]) for root in extrema]
            peaks = extrema[np.argpartition(root_vals, -2)][-2:] # find two peaks of bimodal distribution

            mid, = np.where((x - peaks[0])* (peaks[1] - x) > 0)
             # want data points between the peaks
        except:
            warnings.warn("Peak finding failed!")
            return None

        try:
            p_mid = np.polyfit(x[mid], y[mid], 2) # fit middle section to a parabola
            midpoint = np.roots(np.polyder(p_mid))[0]
        except:
            warnings.warn("Polynomial fit between peaks of distribution poorly conditioned. Falling back on using the minimum! May result in inaccurate split determination.")
            if len(mid) == 0:
                return None

            midx = np.argmin(y[mid])
            midpoint = x[mid][midx]

        return midpoint
示例#23
0
 def nearest(self,wrd,N=10):
     wrd_vec_norm=self.w_to_normv(wrd)
     if wrd_vec_norm is None:
         return
     sims=self.vectors.dot(wrd_vec_norm)/self.norm_constants #cosine similarity to all other vecs
     #http://stackoverflow.com/questions/6910641/how-to-get-indices-of-n-maximum-values-in-a-numpy-array
     return sorted(((sims[idx],self.words[idx]) for idx in numpy.argpartition(sims,-N-1)[-N-1:]), reverse=True)[1:]
示例#24
0
def cluster_newsgroups():
    """ Cluster newsgroup categories. """

    from kmeans import KMeans
    from similarity import simMatrix

    corpus, dictionary = build_dictionary(bigram=True)
    tfidf = TFIDF(dictionary)
    newsgroups = tfidf.vectorize(corpus)
    dictionary = tfidf.dictionary

    categories = sorted(corpus.keys())

    N = 6
    print "\n{}-Most Common Words".format(N)
    for index, category in enumerate(categories):
        nlargest = np.argpartition(newsgroups[index,:], -N)[-N:]
        nlargest = nlargest[np.argsort(newsgroups[index,nlargest])][::-1]
        print "{:>24} {}".format(category, dictionary[nlargest])
    print

    K = 3
    km = KMeans(n_clusters=K)
    km.fit(newsgroups)

    labels = km.labels_

    print "\nKMeans Label Assignment, K = {}".format(K)
    for category, label, in zip(categories, labels):
        print int(label), category

    simMatrix(newsgroups).plot().show()
def predict(clf):
    import numpy as np

    X = h5read('testX_sample_kmeans_3.h5', 'lid/test/X/sample_kmeans_3')

    #print("Data read.")

    yprob = clf.predict_proba(X)
    mp3db = h5read('testXmp3.h5', 'lid/test/X/mp3')
    ylabels = h5read('ydict.h5', 'lid/data/y/labels')
    ylang = h5read('ydict.h5', 'lid/data/y/lang')
    ydict = {k : v for k, v in zip(ylabels, ylang)}

    top_labels = np.zeros((yprob.shape[0], 3))

    for isamp in range(0, yprob.shape[0]):
        #best = np.argmax(yprob[isamp])
        #print best
        NTOP = 3
        top_indices = np.argpartition(yprob[isamp], -NTOP)[-NTOP:]
        top_probs = yprob[isamp][top_indices]
        order = np.argsort(top_probs)
        #print(top_indices)
        #print(top_probs)
        #print(order)
        print(mp3db[isamp] + ',' + ydict[top_indices[order[2]]] + ',1')
        print(mp3db[isamp] + ',' + ydict[top_indices[order[1]]] + ',2')
        print(mp3db[isamp] + ',' + ydict[top_indices[order[0]]] + ',3')

        pass

    pass
示例#26
0
    def query_with_distances(self, v, n):
        """Find indices of `n` most similar vectors from the index to query vector `v`."""
        if self._metric == 'hamming':
            v = numpy.packbits(v)

        if self._metric != 'jaccard':
            # use same precision for query as for index
            v = numpy.ascontiguousarray(v, dtype = self.index.dtype)

        # HACK we ignore query length as that's a constant not affecting the final ordering
        if self._metric == 'angular':
            # argmax_a cossim(a, b) = argmax_a dot(a, b) / |a||b| = argmin_a -dot(a, b)
            dists = -numpy.dot(self.index, v)
        elif self._metric == 'euclidean':
            # argmin_a (a - b)^2 = argmin_a a^2 - 2ab + b^2 = argmin_a a^2 - 2ab
            dists = self.lengths - 2 * numpy.dot(self.index, v)
        elif self._metric == 'hamming':
            diff = numpy.bitwise_xor(v, self.index)
            pc = BruteForceBLAS.popcount
            den = float(len(v) * 8)
            dists = [sum([pc[part] for part in point]) / den for point in diff]
        elif self._metric == 'jaccard':
            dists = [pd[self._metric]['distance'](v, e) for e in self.index]
        else:
            assert False, "invalid metric"  # shouldn't get past the constructor!
        nearest_indices = numpy.argpartition(dists, n)[:n]  # partition-sort by distance, get `n` closest
        indices = [idx for idx in nearest_indices if pd[self._metric]["distance_valid"](dists[idx])]
        def fix(index):
            ep = self.index[index]
            ev = v
            if self._metric == "hamming":
                ep = numpy.unpackbits(ep)
                ev = numpy.unpackbits(ev)
            return (index, pd[self._metric]['distance'](ep, ev))
        return map(fix, indices)
示例#27
0
def process_chunk(chunk, data, k, metric):
    d = cdist(chunk, data, metric=metric).astype('float32')
    p = np.argpartition(d, k).astype('int32')[:, :k]
    rows = np.arange(chunk.shape[0])[:, None]
    d = d[rows, p]
    i = np.argsort(d)
    return d[rows, i], p[rows, i]
示例#28
0
def argtopk(a_plus_idx, k, axis, keepdims):
    """ Chunk and combine function of argtopk

    Extract the indices of the k largest elements from a on the given axis.
    If k is negative, extract the indices of the -k smallest elements instead.
    Note that, unlike in the parent function, the returned elements
    are not sorted internally.
    """
    assert keepdims is True
    axis = axis[0]

    if isinstance(a_plus_idx, list):
        a_plus_idx = list(flatten(a_plus_idx))
        a = np.concatenate([ai for ai, _ in a_plus_idx], axis)
        idx = np.concatenate([broadcast_to(idxi, ai.shape)
                              for ai, idxi in a_plus_idx], axis)
    else:
        a, idx = a_plus_idx

    if abs(k) >= a.shape[axis]:
        return a_plus_idx

    idx2 = np.argpartition(a, -k, axis=axis)
    k_slice = slice(-k, None) if k > 0 else slice(-k)
    idx2 = idx2[tuple(k_slice if i == axis else slice(None)
                      for i in range(a.ndim))]
    return take_along_axis(a, idx2, axis), take_along_axis(idx, idx2, axis)
def get_largest(row, N=10):
    if N >= row.nnz:
        best = zip(row.data, row.indices)
    else:
        ind = numpy.argpartition(row.data, -N)[-N:]
        best = zip(row.data[ind], row.indices[ind])
    return sorted(best, reverse=True)
def get_candidate_dynamic(texts, trainingset, cluster_size, file_name):
    vectorizer = cst_vectorizer.StemmedTfidfVectorizer(**param)
    texts_vec = vectorizer.fit_transform(texts)
    training_vec = vectorizer.transform(trainingset)
    from sklearn.metrics.pairwise import pairwise_distances
    # sim_matrix(i, j) is the distance between the ith array from X and the jth array from Y.
    # From scikit-learn: [‘cityblock’, ‘cosine’, ‘euclidean’, ‘l1’, ‘l2’, ‘manhattan’]. These metrics support sparse matrix inputs.
    sim_matrix = 1 - pairwise_distances(texts_vec, training_vec, metric="cosine")  # euclidean as well
    num_texts = texts_vec.shape[0]
    cluster_size = cluster_size - 1  #减1是因为最后要把texts中放入,所以其实只需选择cluster_size-1个文本
    ind_clustered_tweets = np.zeros([num_texts, cluster_size], dtype=int)

    for i in range(0, num_texts):
        indx = np.argpartition(sim_matrix[i], -cluster_size)[-cluster_size:]
        ind_clustered_tweets[i] = indx

    trainingset = np.array(trainingset)
    clustered_texts = []
    extantion_content = []
    for i in range(0, num_texts):
        ind = ind_clustered_tweets[i]
        clustered_texts.append(texts[i] + ' ' + ' '.join(trainingset[ind]))
        extantion_content.append(' '.join(trainingset[ind]))

    import pickle
    # 推荐file_name的值为neg和pos
    print('和training_data聚合在一起的test data保存在了:./data/extended_test/文件夹*.p中')
    pickle.dump(clustered_texts, open("./data/extended_test_data/" + file_name+"_clustered_texts.p", "wb"))
    pickle.dump(extantion_content, open("./data/extended_test_data/" + file_name+"_extantion_content.p", "wb"))
# 执行上述函数时候需要三种变量
示例#31
0
def argmedian(x):
    return np.argpartition(x, len(x) // 2, axis=0)[len(x) // 2]
        img = img.convert('RGB')
        img = img.resize((IMG_SIZE, IMG_SIZE), Image.ANTIALIAS)
        img = np.array(list(img.getdata()), dtype='uint8')
        img = np.reshape(img, (IMG_SIZE, IMG_SIZE, 3))

        imgs.append(img)

    pred = sess.run(predictions, feed_dict={x: imgs})
    log_soft = sess.run(logits_soft, feed_dict={x: imgs})
    outputFeatureMap(imgs, conv2)

    # build the label map
    label_map = {}
    with open('signnames.csv') as f:
        r = csv.reader(f)
        next(r)
        for row in r:
            label, label_descrip = int(row[0]), row[1]
            label_map[label] = label_descrip

    # print out top 5 softmax probabilities with corresponding sign category
    final_pred = [label_map[i] for i in pred]
    for i in range(len(imgs)):
        index = np.argpartition(log_soft[i], -5)[-5:]
        ind_sort = index[np.argsort(log_soft[i][index])]
        ind_sort = ind_sort[::-1]
        top5_labels = [label_map[j] for j in ind_sort]
        print('%s --> %s --> %s -->%s' %
              (images[i], final_pred[i], log_soft[i][ind_sort], top5_labels))
        print('\n')
示例#33
0
文件: utils.py 项目: hldai/indec
def disp_topics(vocab, topics, n_words=10):
    import numpy as np
    for t in topics:
        idxs = np.argpartition(-t, range(n_words))[:n_words]
        print(' '.join([vocab[i] for i in idxs]))
示例#34
0
def find_critical(learner, data, n=5, k=5, random_state=0):
    """
    :param learner: argument-based learner to be tested
    :param data: learning data
    :param n: number of critical examples
    :param k: folds in cross-validation
    :param random_state: random state to be used in StratifiedKFold function
    :return: n most critical examples (with estimation of 'criticality')
    """
    # first get how problematic is each example (cross-validation)
    # E ... the difference between probability of predicted most probable class
    # and the probability of the example's class.
    # if example is correctly predicted or if example is already covered
    # by an argumented rule, E equals 0.
    # CV
    skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=random_state)
    problematic = np.zeros(len(data))
    problematic_rules = [[] for d in data]
    for learn_ind, test_ind in skf.split(data.X, data.Y):
        # move test_ind with arguments to learn_ind
        arg_ind = []
        if ARGUMENTS in data.domain:
            for t in test_ind:
                if data[t][ARGUMENTS] not in ("", "?"):
                    arg_ind.append(t)
        learn_ind = np.array(sorted(list(learn_ind) + arg_ind), dtype=int)
        test_ind = np.array([t for t in test_ind if t not in arg_ind],
                            dtype=int)
        learn = Table(data.domain, data[learn_ind])
        test = Table(data.domain, data[test_ind])

        classifier = learner(learn)
        rules = classifier.rule_list
        # eval rules on test data
        cov = coverage(rules, test)

        # for each test instance find out best covering rule from the same class
        best_covered = np.zeros(len(test))
        for ri, r in enumerate(rules):
            target = r.target_class == test.Y
            best_covered = np.maximum(best_covered,
                                      (cov[:, ri] & target) * r.quality)

        # compute how problematic each instance is ...
        probs = classifier(test, 1)
        for ti, t in enumerate(test_ind):
            # first check best rule, if same class, it can not be problematic
            d, p = test[ti], probs[ti]
            c = int(d.get_class())
            # find best rule covering this example (best_rule * prediction)
            problematic[t] = (1 - best_covered[ti]) * (1 - p[c])
            problematic_rules[t] = [
                r for ri, r in enumerate(rules) if cov[ti, ri]
            ]

    # compute Mahalanobis distance between instances
    dist_matrix = squareform(pdist(data.X, metric="seuclidean"))

    # criticality is a combination of how much is the instance problematic
    # and its distance to other problematic examples of the same class
    # for loop over classes
    vals = np.unique(data.Y.astype(dtype=int))
    k = int(np.ceil(n / len(vals)))
    crit_ind = []
    for i in vals:
        inst = (data.Y == i) & (problematic > 1e-6)
        inst_pos = np.where(inst)[0]
        wdist = dist_matrix[np.ix_(inst, inst)]
        # select k most problematic instances
        prob = problematic[inst]
        ind = np.argpartition(prob, -k)[-k:]
        centers = kmeans(wdist, prob, ind)
        for c in centers:
            crit_ind.append(inst_pos[c])

    # sort critical indices given problematicness
    crit_ind = sorted(crit_ind, key=lambda x: -problematic[x])

    return (crit_ind, problematic[crit_ind],
            [problematic_rules[i] for i in crit_ind])
示例#35
0
    def plot_pca(self,
                 plot_filename=None,
                 PCs=[1, 2],
                 plot_title='',
                 image_format=None,
                 log1p=False,
                 plotWidth=5,
                 plotHeight=10,
                 cols=None,
                 marks=None):
        """
        Plot the PCA of a matrix

        Returns the matrix of plotted values.
        """
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(plotWidth, plotHeight))

        # Filter
        m = self.matrix
        rvs = m.var(axis=1)
        if self.transpose:
            m = m[np.nonzero(rvs)[0], :]
            rvs = rvs[np.nonzero(rvs)[0]]
        if self.ntop > 0 and m.shape[0] > self.ntop:
            m = m[np.argpartition(rvs, -self.ntop)[-self.ntop:], :]
            rvs = rvs[np.argpartition(rvs, -self.ntop)[-self.ntop:]]

        # log2 (if requested)
        if self.log2:
            self.matrix = np.log2(self.matrix + 0.01)

        # Row center / transpose
        if self.rowCenter and not self.transpose:
            _ = self.matrix.mean(axis=1)
            self.matrix -= _[:, None]
        if self.transpose:
            m = m.T

        # Center and scale
        m2 = (m - np.mean(m, axis=0))
        m2 /= np.std(m2, axis=0, ddof=1)  # Use the unbiased std. dev.

        # SVD
        U, s, Vh = np.linalg.svd(
            m2, full_matrices=False,
            compute_uv=True)  # Is full_matrices ever needed?

        # % variance, eigenvalues
        eigenvalues = s**2
        variance = eigenvalues / float(np.max([1, m2.shape[1] - 1]))
        pvar = variance / variance.sum()

        # Weights/projections
        Wt = Vh
        if self.transpose:
            # Use the projected coordinates for the transposed matrix
            Wt = np.dot(m2, Vh.T).T

        if plot_filename is not None:
            n = n_bars = len(self.labels)
            if eigenvalues.size < n:
                n_bars = eigenvalues.size
            markers = itertools.cycle(
                matplotlib.markers.MarkerStyle.filled_markers)
            if cols is not None:
                colors = itertools.cycle(cols)
            else:
                colors = itertools.cycle(
                    plt.cm.gist_rainbow(np.linspace(0, 1, n)))

            if marks is not None:
                markers = itertools.cycle(marks)

            if image_format == 'plotly':
                self.plotly_pca(plot_filename, Wt, pvar, PCs, eigenvalues,
                                cols, plot_title)
            else:
                ax1.axhline(y=0, color="black", linestyle="dotted", zorder=1)
                ax1.axvline(x=0, color="black", linestyle="dotted", zorder=2)
                for i in range(n):
                    color = next(colors)
                    marker = next(markers)
                    if isinstance(color, np.ndarray):
                        color = pltcolors.to_hex(color, keep_alpha=True)
                    ax1.scatter(Wt[PCs[0] - 1, i],
                                Wt[PCs[1] - 1, i],
                                marker=marker,
                                color=color,
                                s=150,
                                label=self.labels[i],
                                zorder=i + 3)
                if plot_title == '':
                    ax1.set_title('PCA')
                else:
                    ax1.set_title(plot_title)
                ax1.set_xlabel('PC{} ({:4.1f}% of var. explained)'.format(
                    PCs[0], 100.0 * pvar[PCs[0] - 1]))
                ax1.set_ylabel('PC{} ({:4.1f}% of var. explained)'.format(
                    PCs[1], 100.0 * pvar[PCs[1] - 1]))
                lgd = ax1.legend(scatterpoints=1,
                                 loc='center left',
                                 borderaxespad=0.5,
                                 bbox_to_anchor=(1, 0.5),
                                 prop={'size': 12},
                                 markerscale=0.9)

                # Scree plot
                ind = np.arange(n_bars)  # the x locations for the groups
                width = 0.35  # the width of the bars

                if mpl.__version__ >= "2.0.0":
                    ax2.bar(2 * width + ind, eigenvalues[:n_bars], width * 2)
                else:
                    ax2.bar(width + ind, eigenvalues[:n_bars], width * 2)
                ax2.set_ylabel('Eigenvalue')
                ax2.set_xlabel('Principal Component')
                ax2.set_title('Scree plot')
                ax2.set_xticks(ind + width * 2)
                ax2.set_xticklabels(ind + 1)

                ax3 = ax2.twinx()
                ax3.axhline(y=1, color="black", linestyle="dotted")
                ax3.plot(width * 2 + ind, pvar.cumsum()[:n], "r-")
                ax3.plot(width * 2 + ind,
                         pvar.cumsum()[:n],
                         "wo",
                         markeredgecolor="black")
                ax3.set_ylim([0, 1.05])
                ax3.set_ylabel('Cumulative variability')

                plt.subplots_adjust(top=3.85)
                plt.tight_layout()
                plt.savefig(plot_filename,
                            format=image_format,
                            bbox_extra_artists=(lgd, ),
                            bbox_inches='tight')
                plt.close()

        return Wt, eigenvalues
示例#36
0
def path_matching_local(path: np.ndarray,
                        ego_position: np.ndarray,
                        consider_as_closed: bool = False,
                        s_tot: Union[float, None] = None,
                        no_interp_values: int = 11) -> tuple:
    """
    author:
    Alexander Heilmeier

    .. description::
    Get the corresponding s coordinate and the displacement of the own vehicle in relation to a local path.

    .. inputs::
    :param path:                Unclosed path used to match ego position ([s, x, y]).
    :type path:                 np.ndarray
    :param ego_position:        Ego position of the vehicle ([x, y]).
    :type ego_position:         np.ndarray
    :param consider_as_closed:  If the path is closed in reality we can interpolate between last and first point. This
                                can be enforced by setting consider_as_closed = True.
    :type consider_as_closed:   bool
    :param s_tot:               Total length of path in m.
    :type s_tot:                Union[float, None]
    :param no_interp_values:    Number of interpolation points that are created between the two closest points on the
                                path to obtain a more accurate result.
    :type no_interp_values:     int

    .. outputs::
    :return s_interp:           Interpolated s position of the vehicle in m.
    :rtype s_interp:            np.ndarray
    :return d_displ:            Estimated displacement from the trajectory in m.
    :rtype d_displ:             np.ndarray
    """

    # ------------------------------------------------------------------------------------------------------------------
    # CHECK INPUT ------------------------------------------------------------------------------------------------------
    # ------------------------------------------------------------------------------------------------------------------

    if path.shape[1] != 3:
        raise RuntimeError("Inserted path must have 3 columns [s, x, y]!")

    if consider_as_closed and s_tot is None:
        print("WARNING: s_tot is not handed into path_matching_local function! Estimating s_tot on the basis of equal"
              "stepsizes")
        s_tot = path[-1, 0] + path[1, 0] - path[0, 0]  # assume equal stepsize

    # ------------------------------------------------------------------------------------------------------------------
    # SELF LOCALIZATION ON RACELINE ------------------------------------------------------------------------------------
    # ------------------------------------------------------------------------------------------------------------------

    # get the nearest path point to ego position
    dists_to_cg = np.hypot(path[:, 1] - ego_position[0], path[:, 2] - ego_position[1])
    ind_min = np.argpartition(dists_to_cg, 1)[0]

    # get previous and following point on path
    if consider_as_closed:
        if ind_min == 0:
            ind_prev = dists_to_cg.shape[0] - 1
            ind_follow = 1

        elif ind_min == dists_to_cg.shape[0] - 1:
            ind_prev = ind_min - 1
            ind_follow = 0

        else:
            ind_prev = ind_min - 1
            ind_follow = ind_min + 1

    else:
        ind_prev = max(ind_min - 1, 0)
        ind_follow = min(ind_min + 1, dists_to_cg.shape[0] - 1)

    # get angle between selected point and neighbours: ang1 to previous point, ang2 to following point on path
    ang_prev = np.abs(trajectory_planning_helpers.angle3pt.angle3pt(path[ind_min, 1:3],
                                                                    ego_position,
                                                                    path[ind_prev, 1:3]))

    ang_follow = np.abs(trajectory_planning_helpers.angle3pt.angle3pt(path[ind_min, 1:3],
                                                                      ego_position,
                                                                      path[ind_follow, 1:3]))

    # extract neighboring points -> closest point and the point resulting in the larger angle
    if ang_prev > ang_follow:
        a_pos = path[ind_prev, 1:3]
        b_pos = path[ind_min, 1:3]
        s_curs = np.append(path[ind_prev, 0], path[ind_min, 0])
    else:
        a_pos = path[ind_min, 1:3]
        b_pos = path[ind_follow, 1:3]
        s_curs = np.append(path[ind_min, 0], path[ind_follow, 0])

    # adjust s if closed path shell be considered and we have the case of interpolation between last and first point
    if consider_as_closed:
        if ind_min == 0 and ang_prev > ang_follow:
            s_curs[1] = s_tot
        elif ind_min == dists_to_cg.shape[0] - 1 and ang_prev <= ang_follow:
            s_curs[1] = s_tot

    # interpolate between those points (linear) for better positioning
    t_lin = np.linspace(0.0, 1.0, no_interp_values)  # set relative lengths that are evaluated for interpolation
    x_cg_interp = np.linspace(a_pos[0], b_pos[0], no_interp_values)
    y_cg_interp = np.linspace(a_pos[1], b_pos[1], no_interp_values)

    # get nearest of those interpolated points relative to ego position
    dists_to_cg = np.hypot(x_cg_interp - ego_position[0], y_cg_interp - ego_position[1])
    ind_min_interp = np.argpartition(dists_to_cg, 1)[0]
    t_lin_used = t_lin[ind_min_interp]

    # ------------------------------------------------------------------------------------------------------------------
    # CALCULATE REQUIRED INFORMATION -----------------------------------------------------------------------------------
    # ------------------------------------------------------------------------------------------------------------------

    # calculate current path length
    s_interp = np.interp(t_lin_used, (0.0, 1.0), s_curs)

    # get displacement between ego position and path (needed for lookahead distance)
    d_displ = dists_to_cg[ind_min_interp]

    return s_interp, d_displ
示例#37
0
文件: demo.py 项目: leo-lp/neon-1
print(ex_answer)

while True:
    # ask user for story and question
    story_lines = []
    line = input("\nPlease enter a story:\n")
    while line != "":
        story_lines.append(line)
        line = input()
    story = ("\n".join(story_lines)).strip()

    question = input("Please enter a question:\n")

    # convert user input into a suitable network input
    s = vectorize(story, babi.story_maxlen)
    q = vectorize(question, babi.query_maxlen)

    # get prediction probabilities with forward propagation
    probs = model_inference.fprop(x=(s, q), inference=True).get()

    # get top k answers
    top_k = -min(5, babi.vocab_size)
    max_indices = np.argpartition(probs, top_k, axis=0)[top_k:]
    max_probs = probs[max_indices]
    sorted_idx = max_indices[np.argsort(max_probs, axis=0)]

    print("\nAnswer:")
    for idx in reversed(sorted_idx):
        idx = int(idx)
        print(babi.index_to_word[idx], float(probs[idx]))
X_train.shape, y_train.shape

param = [{
    'kernel': ['linear'],
    'C': [10., 30., 100., 300., 1000., 3000., 10000., 30000.0]
}, {
    'kernel': ['rbf'],
    'C': [10., 30., 100., 300., 1000., 3000., 10000., 30000.0],
    'gamma': [0.001, 0.01, 0.1, 1]
}]

grid = GridSearchCV(SVR(),
                    param,
                    scoring='neg_mean_squared_error',
                    verbose=2,
                    n_jobs=6,
                    cv=5)

grid.fit(X_train, y_train)
grid.best_params_

negative_mse = grid.best_score_
rmse = np.sqrt(-negative_mse)
rmse

grid.best_estimator_.coef_

a = np.array([3, 10, 5, 6, 4, 5])
a
np.sort(np.argpartition(a, -5)[-5:])
示例#39
0
文件: predict.py 项目: lopuhin/ru-lm
def argsort_k_largest(x, k):
    if k >= len(x):
        return np.argsort(x)[::-1]
    indices = np.argpartition(x, -k)[-k:]
    values = x[indices]
    return indices[np.argsort(-values)]
示例#40
0
def pq_knn(dist, topk):
    ids = np.argpartition(dist, topk - 1)[:topk]
    ids = ids[dist[ids].argsort()]
    return ids
示例#41
0
    def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
        """Finds the K-neighbors of a point.

        Returns indices of and distances to the neighbors of each point.

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            The query time series.
            If not provided, neighbors of each indexed point are returned.
            In this case, the query point is not considered its own neighbor.
        n_neighbors : int
            Number of neighbors to get (default is the value passed to the
            constructor).
        return_distance : boolean, optional. Defaults to True.
            If False, distances will not be returned

        Returns
        -------
        dist : array
            Array representing the distance to points, only present if
            return_distance=True
        ind : array
            Indices of the nearest points in the population matrix.
        """
        self_neighbors = False
        if n_neighbors is None:
            n_neighbors = self.n_neighbors
        if X is None:
            X = self._X_fit
            self_neighbors = True
        if self.metric == "precomputed":
            full_dist_matrix = X
        else:

            if X.ndim == 2:  # sklearn-format case
                X = X.reshape((X.shape[0], -1, self._d))
                fit_X = self._X_fit.reshape(
                    (self._X_fit.shape[0], -1, self._d))
            elif hasattr(self, '_ts_fit') and self._ts_fit is not None:
                fit_X = self._ts_fit
            else:
                fit_X = self._X_fit

            if (self.metric in TSLEARN_VALID_METRICS
                    or self.metric in [cdist_dtw, cdist_soft_dtw, cdist_sax]):
                full_dist_matrix = self._precompute_cross_dist(X,
                                                               other_X=fit_X)
            elif self.metric in ["euclidean", "sqeuclidean", "cityblock"]:
                full_dist_matrix = scipy_cdist(X.reshape((X.shape[0], -1)),
                                               fit_X.reshape(
                                                   (fit_X.shape[0], -1)),
                                               metric=self.metric)
            else:
                raise ValueError("Unrecognized time series metric string: %s "
                                 "(should be one of 'dtw', 'softdtw', "
                                 "'sax', 'euclidean', 'sqeuclidean' "
                                 "or 'cityblock')" % self.metric)

        # Code similar to sklearn (sklearn/neighbors/base.py), to make sure
        # that TimeSeriesKNeighbor~(metric='euclidean') has the same results as
        # feeding a distance matrix to sklearn.KNeighbors~(metric='euclidean')
        kbin = min(n_neighbors - 1, full_dist_matrix.shape[1] - 1)
        # argpartition will make sure the first `kbin` entries are the
        # `kbin` smallest ones (but in arbitrary order) --> complexity: O(n)
        ind = numpy.argpartition(full_dist_matrix, kbin, axis=1)

        if self_neighbors:
            ind = ind[:, 1:]
        if n_neighbors > full_dist_matrix.shape[1]:
            n_neighbors = full_dist_matrix.shape[1]
        ind = ind[:, :n_neighbors]

        n_ts = X.shape[0]
        sample_range = numpy.arange(n_ts)[:, None]
        # Sort the `kbin` nearest neighbors according to distance
        ind = ind[sample_range,
                  numpy.argsort(full_dist_matrix[sample_range, ind])]
        dist = full_dist_matrix[sample_range, ind]

        if hasattr(self, '_ts_metric'):
            self.metric = self._ts_metric

        if return_distance:
            return dist, ind
        else:
            return ind
示例#42
0
def indices_of_top_k(arr, k):
    return np.sort(np.argpartition(np.array(arr), -k)[-k:])
示例#43
0
def getMinOfNum(a, K):
    a = np.array(a)
    return np.argpartition(a, -K)[-K:]
示例#44
0
                                training: False
                            })
        los += r_l.sum()
        s += res
    los = los / x_test.shape[0]
    s = s / x_test.shape[0]
    print(" Test Accuracy: ", s)
    summ4 = sess.run(loss_test_summary, feed_dict={read_loss_test: los})
    summ_writer.add_summary(summ4, ep)

    summ5 = sess.run(val_acc_summary, feed_dict={read_val_acc: s})
    summ_writer.add_summary(summ5, ep)

    # Swapping samples
    if swapped != 0:
        print("Swapping ", swapped, " samples.")
        ind_batch_low = np.argpartition(full_batch_losses, swapped)

        ind_ma_high = np.argpartition(EMA_batch_losses, N - swapped)

        batch_low_swap = ind_batch_low[swapped:]

        ma_high_swap = ind_ma_high[-swapped:]

        indices = np.concatenate((batch_low_swap, ma_high_swap))

    # Optional if you want to change the number of swapped samples during training
    # swapped += int(9000 / epochs)

    np.random.shuffle(indices)
    def translate_sequence_beam(self, input_seq, beam_size=1):
        # https://machinelearningmastery.com/beam-search-decoder-natural-language-processing/
        # Encode the input as state vectors.
        states_value = self.encoder_model.predict(input_seq)

        # Generate empty target sequence of length 1.
        target_seq = np.zeros((1, 1))

        # only one candidate at the begining
        candidates = [
            Candidate(target_seq=target_seq, last_prediction=SpecialSymbols.GO_IX, states_value=states_value, score=0,
                      decoded_sentence="")
        ]

        while True:
            should_stop = True
            new_candidates = []
            for candidate in candidates:
                if not candidate.finalised:
                    outputs = self.decoder_model.predict(
                        [candidate.target_seq] + candidate.states_value)
                    should_stop = False

                    output_tokens = outputs[0][-1]

                    # find n (beam_size) best predictions
                    indices = np.argpartition(output_tokens, -beam_size)[-beam_size:]

                    for sampled_token_index in indices:
                        score = -math.log(output_tokens[sampled_token_index])
                        # how long is the sentence, to compute average score
                        step = candidate.get_sentence_length() + 1

                        # i believe scores should be summed together because log prob is used https://stats.stackexchange.com/questions/121257/log-probability-vs-product-of-probabilities
                        # score is average of all probabilities (normalization so that longer sequences are not penalized)
                        # incremental average https://math.stackexchange.com/questions/106700/incremental-averageing
                        avg_score = utils.incremental_average(candidate.score, score, step)

                        sampled_word = self.target_vocab.ix_to_word[sampled_token_index]

                        new_candidate = Candidate(target_seq=candidate.target_seq,
                                                  states_value=states_value,
                                                  decoded_sentence=candidate.decoded_sentence,
                                                  score=avg_score,
                                                  sampled_word=sampled_word, last_prediction=sampled_token_index)
                        new_candidates.append(new_candidate)

                        # Exit condition: either hit max length
                        # or find stop character.
                        if sampled_word == SpecialSymbols.EOS:
                            continue

                        decoded_len = new_candidate.get_sentence_length()

                        if decoded_len > self.training_dataset.y_max_seq_len \
                                and decoded_len > self.test_dataset.y_max_seq_len:
                            new_candidate.finalise()
                            continue

                # finished candidates are transfered to new_candidates automatically
                else:
                    new_candidates.append(candidate)

            # take n (beam_size) best candidates
            candidates = sorted(new_candidates, key=lambda can: can.score)[:beam_size]

            if should_stop:
                break

        return candidates[0].decoded_sentence
示例#46
0
 def top_elements(array, k):
     ind = np.argpartition(array, -k)[-k:]
     return ind[np.argsort(array[ind])][::-1]
print('Recall: ', recall, '\n')

''' Part 3 '''
print('Part 3 - k-NN Classifier \n')

# Variables to keep track of result from each step
confusion_steps = []
accuracy_steps = []

# Perform knn classifier
print('K Value - Accuracy')
for i in range(1, 25):
    confusion = [[0, 0], [0, 0]]
    for j in range(len(test_data)):
        distances = np.array(np.power(abs(train_data.sub(np.array(np.array(test_data.loc[[j], :])[0]))), 2).sum(axis=1))
        closest_neighbours = np.array([train_output[j] for j in np.argpartition(distances, i)[:i]])
        pred_value = 1 if closest_neighbours.mean() > 0.5 else 0
        true_value = test_output[j]
        if pred_value == 1:
            if pred_value == true_value:
                confusion[0][0] += 1
            else:
                confusion[0][1] += 1
        if pred_value == 0:
            if pred_value == true_value:
                confusion[1][1] += 1
            else:
                confusion[1][0] += 1
    tp = confusion[0][0]
    fp = confusion[0][1]
    fn = confusion[1][0]
示例#48
0
img1 = cv2.imread(i1)
img2 = cv2.imread(i2)

# Calculate Optical Flow
h_oflow, v_oflow = calc_optical_flow(img1, img2)

# Make copies of the optical flow to play with
pof_h = np.copy(h_oflow)
pof_v = np.copy(v_oflow)

# Find the magnitudes of movement given the h and v oflows mag(x,y) = sqrt( (h_oflow^2) + (v_oflow^2) )
magnitudes = np.sqrt((h_oflow)**2 + (v_oflow)**2)

# Find the top N locations of magnitude
N = 20
indices = np.argpartition(magnitudes.flatten(), -N)[-N:]
locs = np.vstack(np.unravel_index(indices, magnitudes.shape)).T

print "Perturbing at: ", locs

# Apply the N perturbations to optical flow field
for loc in locs:
    row = loc[0]
    col = loc[1]
    pof_h[row, col] *= -1
    pof_v[row, col] *= -1

# Reverse the optical flow perturbations onto two adversarial spatial images
pimg1, pimg2 = perturbed_oflow_to_images(img1, img2, pof_h, pof_v, locs)

# Recalculate optical flow on adversarial spatial images
            end_idx = min((batch_idx + 1) * args.ts_batch_size, nr_tst_num)
            X = X_tst[start_idx:end_idx]
            Y = Y_tst_o[start_idx:end_idx]
            data = Variable(torch.from_numpy(X).long()).cuda()

            candidates = baseline(data)
            candidates = candidates.data.cpu().numpy()

            Y_pred = np.zeros([candidates.shape[0], args.num_classes])
            for i in range(candidates.shape[0]):
                candidate_labels = candidates[i, :].argsort()[-args.re_ranking:][::-1].tolist()
                _, activations_2nd = capsule_net(data[i, :].unsqueeze(0), candidate_labels)
                Y_pred[i, candidate_labels] = activations_2nd.squeeze(2).data.cpu().numpy()

            for i in range(Y_pred.shape[0]):
                sorted_idx = np.argpartition(-Y_pred[i, :], top_k)[:top_k]
                row_idx_list += [i + start_idx] * top_k
                col_idx_list += (sorted_idx).tolist()
                val_idx_list += Y_pred[i, sorted_idx].tolist()

            done = time.time()
            elapsed = done - start

            print("\r Epoch: {} Reranking: {} Iteration: {}/{} ({:.1f}%)  Loss: {:.5f} {:.5f}".format(
                  (epoch + 1), args.re_ranking, batch_idx, nr_batches,
                  batch_idx * 100 / nr_batches,
                  0, elapsed),
                  end="")

        m = max(row_idx_list) + 1
        n = max(k_trn, k_tst)
示例#50
0
文件: Valuate.py 项目: zwcdp/HCA
def fun_idxs_of_max_n_score(user_scores_to_all_items, top_k):
    # 从一个向量里找到前n个大数所对应的index
    return np.argpartition(user_scores_to_all_items, -top_k)[-top_k:]
示例#51
0
文件: HW3_Cart2.py 项目: enterlina/ML
def find_max_ind(auc_ind):
    auc_ind = np.array(auc_ind)
    ind = np.argpartition(auc_ind, -3)[-3:]
    return ind, auc_ind[ind]
示例#52
0
def embed_out_of_sample(X_train, X_manifold, X_out, K, beta, neighbor_measure):
    """
    ******************************************************************
        *
        *  Func:    embed_out_of_sample(X_train, X_manifold, X_out, K, beta, neighbor_measure)
        *
        *  Desc:    Embeds out-of-sample points into lower-dimensional space.
        *           Uses a k-nearest neighbor, constrained least square reconstruction.
        *
        *  Inputs:
        *           X_train - NxD matrix of training data coordinates
        *
        *           X_manifold - NxK matrix of low-dimensional training data coordinates
        *
        *           X_out - MxD data matrix of out-of-sample points
        *
        *           K - dimensionality of embedding space
        *
        *           beta - bandwidth of RBf affinity function
        *
        *           neighbor_measure - number of neighbors to consider in k-NN graph
        *          
        *  Outputs:
        *           Z_out - MxK data matrix of embedded out of sample points
        * 
    ******************************************************************
    """
    
    print("\nEmbedding out of sample data...")
    
    ## Extract constants
    num_total = np.shape(X_train)[0] ## Number of training data points
    num_out_sample = np.shape(X_out)[0] ## Number of out-of-sample-data-points
    input_dim = np.shape(X_out)[1] ## Dimesnionality of input space
    
    Z_out = np.zeros((num_out_sample,K)) ## Initialize out of sample embedded coordinate matrix
    
    ##### Affinity of out-of-sample with training set #####
    print("Computing affinity matrices...")
    
    ## Define K-nearest neighbor graph
    W_L2 = distance_matrix(X_out, X_train, p=2)
    W_neighbors = W_L2
    
    ## Square L2 distances, divide by negative bandwidth and exponentiate
    W_total = np.exp((-1/beta)*(W_L2**2))
    print("Embedding out-of-sample points...")
    for idx in range(0,num_out_sample):
        temp_row = W_neighbors[idx, :]
        
        ## indicies of nearest neighbors according to L2 distance
        valid_ind = np.argpartition(temp_row, neighbor_measure) 
        
        ##### Find reconstruction weights of current out of sample NO bias ######
        X_recon = X_train[valid_ind[0:neighbor_measure],:].T
        x_current = X_out[idx,:]
        x_current= x_current.astype(np.double)
        X_recon - X_recon.astype(np.double)
        w_recon = unmix_cvxopt(np.expand_dims(x_current, axis=1), X_recon, gammaConst=0, P=None)
        w_recon = np.squeeze(w_recon)
        
        ## Embed sample as reconstruction of low-dimensional training data embeddings
        Z_recon = X_manifold[valid_ind[0:neighbor_measure],:].T
        z = np.dot(Z_recon, w_recon)
        
        Z_out[idx,:] = z
        
    print('Done!')
           
    return Z_out
示例#53
0
def fitPlanesPiecewise(image, depth, normal, info, numOutputPlanes=20, imageIndex=1, parameters={}):
    if 'meanshift' in parameters and parameters['meanshift'] > 0:
        import sklearn.cluster
        meanshift = sklearn.cluster.MeanShift(parameters['meanshift'])
        pass
    
    from pylsd import lsd
    
    height = depth.shape[0]
    width = depth.shape[1]

    camera = getCameraFromInfo(info)
    urange = (np.arange(width, dtype=np.float32) / (width) * (camera['width']) - camera['cx']) / camera['fx']
    urange = urange.reshape(1, -1).repeat(height, 0)
    vrange = (np.arange(height, dtype=np.float32) / (height) * (camera['height']) - camera['cy']) / camera['fy']
    vrange = vrange.reshape(-1, 1).repeat(width, 1)
    
    X = depth * urange
    Y = depth
    Z = -depth * vrange


    normals = normal.reshape((-1, 3))
    normals = normals / np.maximum(np.linalg.norm(normals, axis=-1, keepdims=True), 1e-4)
    validMask = np.logical_and(np.linalg.norm(normals, axis=-1) > 1e-4, depth.reshape(-1) > 1e-4)
    
    points = np.stack([X, Y, Z], axis=2).reshape(-1, 3)
    valid_points = points[validMask]
    
    lines = lsd(image.mean(2))

    lineImage = image.copy()
    for line in lines:
        cv2.line(lineImage, (int(line[0]), int(line[1])), (int(line[2]), int(line[3])), (0, 0, 255), int(np.ceil(line[4] / 2)))
        continue
    cv2.imwrite('test/lines.png', lineImage)

    numVPs = 3
    VPs, VPLines, remainingLines = calcVanishingPoints(lines, numVPs=numVPs)

    lineImage = image.copy()    
    for VPIndex, lines in enumerate(VPLines):
        for line in lines:
            cv2.line(lineImage, (int(line[0]), int(line[1])), (int(line[2]), int(line[3])), ((VPIndex == 0) * 255, (VPIndex == 1) * 255, (VPIndex == 2) * 255), int(np.ceil(line[4] / 2)))
            continue
        continue
    cv2.imwrite('test/lines_vp.png', lineImage)    

    dominantNormals = np.stack([(VPs[:, 0] * info[16] / width - info[2]) / info[0], np.ones(numVPs), -(VPs[:, 1] * info[17] / height - info[6]) / info[5]], axis=1)
    dominantNormals /= np.maximum(np.linalg.norm(dominantNormals, axis=1, keepdims=True), 1e-4)

    dotThreshold = np.cos(np.deg2rad(20))
    for normalIndex, crossNormals in enumerate([[1, 2], [2, 0], [0, 1]]):
        normal = np.cross(dominantNormals[crossNormals[0]], dominantNormals[crossNormals[1]])
        normal = normalize(normal)
        if np.dot(normal, dominantNormals[normalIndex]) < dotThreshold:
            dominantNormals = np.concatenate([dominantNormals, np.expand_dims(normal, 0)], axis=0)
            pass
        continue

    print(VPs)
    print(dominantNormals)
    
    dominantNormalImage = np.abs(np.matmul(normal, dominantNormals.transpose()))
    cv2.imwrite('test/dominant_normal.png', drawMaskImage(dominantNormalImage))
    
    planeHypothesisAreaThreshold = width * height * 0.01
    
    planes = []
    vpPlaneIndices = []
    if 'offsetGap' in parameters:
        offsetGap = parameters['offsetGap']
    else:
        offsetGap = 0.1
        pass
    planeIndexOffset = 0

    for dominantNormal in dominantNormals:
        if np.linalg.norm(dominantNormal) < 1e-4:
            continue
        offsets = np.tensordot(valid_points, dominantNormal, axes=([1], [0]))

        if 'meanshift' in parameters and parameters['meanshift'] > 0:
            sampleInds = np.arange(offsets.shape[0])
            np.random.shuffle(sampleInds)
            meanshift.fit(np.expand_dims(offsets[sampleInds[:int(offsets.shape[0] * 0.02)]], -1))
            for offset in meanshift.cluster_centers_:
                planes.append(dominantNormal * offset)
                continue
        else:
            offset = offsets.min()
            maxOffset = offsets.max()
            while offset < maxOffset:
                planeMask = np.logical_and(offsets >= offset, offsets < offset + offsetGap)
                segmentOffsets = offsets[np.logical_and(offsets >= offset, offsets < offset + offsetGap)]
                if segmentOffsets.shape[0] < planeHypothesisAreaThreshold:
                    offset += offsetGap
                    continue
                planeD = segmentOffsets.mean()
                planes.append(dominantNormal * planeD)
                offset = planeD + offsetGap

                continue
            pass
        

        vpPlaneIndices.append(np.arange(planeIndexOffset, len(planes)))
        planeIndexOffset = len(planes)
        continue

    if len(planes) == 0:
        return np.array([]), np.zeros(segmentation.shape).astype(np.int32)    
    planes = np.array(planes)

    
    
    planesD = np.linalg.norm(planes, axis=1, keepdims=True)
    planeNormals = planes / np.maximum(planesD, 1e-4)

    if 'distanceCostThreshold' in parameters:
        distanceCostThreshold = parameters['distanceCostThreshold']
    else:
        distanceCostThreshold = 0.05
        pass


    distanceCost = np.abs(np.tensordot(points, planeNormals, axes=([1, 1])) - np.reshape(planesD, [1, -1])) / distanceCostThreshold

    normalCostThreshold = 1 - np.cos(np.deg2rad(30))        
    normalCost = (1 - np.abs(np.tensordot(normals, planeNormals, axes=([1, 1])))) / normalCostThreshold

    if 'normalWeight' in parameters:
        normalWeight = parameters['normalWeight']
    else:
        normalWeight = 1
        pass
    
    unaryCost = distanceCost + normalCost * normalWeight
    unaryCost *= np.expand_dims(validMask.astype(np.float32), -1)    
    unaries = unaryCost.reshape((width * height, -1))
    
    
    print('number of planes ', planes.shape[0])
    cv2.imwrite('test/distance_cost.png', drawSegmentationImage(-distanceCost.reshape((height, width, -1)), unaryCost.shape[-1] - 1))

    cv2.imwrite('test/normal_cost.png', drawSegmentationImage(-normalCost.reshape((height, width, -1)), unaryCost.shape[-1] - 1))

    cv2.imwrite('test/unary_cost.png', drawSegmentationImage(-unaryCost.reshape((height, width, -1)), blackIndex=unaryCost.shape[-1] - 1))

    cv2.imwrite('test/segmentation.png', drawSegmentationImage(-unaries.reshape((height, width, -1)), blackIndex=unaries.shape[-1]))
    

    if 'numProposals' in parameters:
        numProposals = parameters['numProposals']
    else:
        numProposals = 3
        pass

    numProposals = min(numProposals, unaries.shape[-1] - 1)
    
    proposals = np.argpartition(unaries, numProposals)[:, :numProposals]
    unaries = -readProposalInfo(unaries, proposals).reshape((-1, numProposals))
    
    nodes = np.arange(height * width).reshape((height, width))

    deltas = [(0, 1), (1, 0)]
    
    edges = []
    edges_features = []
            
                
    for delta in deltas:
        deltaX = delta[0]
        deltaY = delta[1]
        partial_nodes = nodes[max(-deltaY, 0):min(height - deltaY, height), max(-deltaX, 0):min(width - deltaX, width)].reshape(-1)
        edges.append(np.stack([partial_nodes, partial_nodes + (deltaY * width + deltaX)], axis=1))

        labelDiff = (np.expand_dims(proposals[partial_nodes], -1) != np.expand_dims(proposals[partial_nodes + (deltaY * width + deltaX)], 1)).astype(np.float32)

        
        edges_features.append(labelDiff)
        continue

    edges = np.concatenate(edges, axis=0)
    edges_features = np.concatenate(edges_features, axis=0)


    if 'edgeWeights' in parameters:
        edgeWeights = parameters['edgeWeights']
    else:
        edgeWeights = [0.5, 0.6, 0.6]
        pass    
    
    lineSets = np.zeros((height * width, 3))
    creaseLines = np.expand_dims(np.stack([planeNormals[:, 0] / info[0], planeNormals[:, 1], -planeNormals[:, 2] / info[5]], axis=1), 1) * planesD.reshape((1, -1, 1))
    creaseLines = creaseLines - np.transpose(creaseLines, [1, 0, 2])    
    for planeIndex_1 in xrange(planes.shape[0]):
        for planeIndex_2 in xrange(planeIndex_1 + 1, planes.shape[0]):
            creaseLine = creaseLines[planeIndex_1, planeIndex_2]
            if abs(creaseLine[0]) > abs(creaseLine[2]):
                vs = np.arange(height)
                us = -(creaseLine[1] + (vs - info[6]) * creaseLine[2]) / creaseLine[0] + info[2]
                minUs = np.floor(us).astype(np.int32)
                maxUs = minUs + 1
                validIndicesMask = np.logical_and(minUs >= 0, maxUs < width)
                if validIndicesMask.sum() == 0:
                    continue
                vs = vs[validIndicesMask]
                minUs = minUs[validIndicesMask]
                maxUs = maxUs[validIndicesMask]
                edgeIndices = (height - 1) * width + (vs * (width - 1) + minUs)
                for index, edgeIndex in enumerate(edgeIndices):
                    pixel_1 = vs[index] * width + minUs[index]
                    pixel_2 = vs[index] * width + maxUs[index]
                    proposals_1 = proposals[pixel_1]
                    proposals_2 = proposals[pixel_2]                    
                    if planeIndex_1 in proposals_1 and planeIndex_2 in proposals_2:
                        proposalIndex_1 = np.where(proposals_1 == planeIndex_1)[0][0]
                        proposalIndex_2 = np.where(proposals_2 == planeIndex_2)[0][0]
                        edges_features[edgeIndex, proposalIndex_1, proposalIndex_2] *= edgeWeights[0]
                        pass
                    if planeIndex_2 in proposals_1 and planeIndex_1 in proposals_2:
                        proposalIndex_1 = np.where(proposals_1 == planeIndex_2)[0][0]
                        proposalIndex_2 = np.where(proposals_2 == planeIndex_1)[0][0]
                        edges_features[edgeIndex, proposalIndex_1, proposalIndex_2] *= edgeWeights[0]
                        pass
                    continue

                lineSets[vs * width + minUs, 0] = 1
                lineSets[vs * width + maxUs, 0] = 1
            else:
                us = np.arange(width)
                vs = -(creaseLine[1] + (us - info[2]) * creaseLine[0]) / creaseLine[2] + info[6]
                minVs = np.floor(vs).astype(np.int32)
                maxVs = minVs + 1
                validIndicesMask = np.logical_and(minVs >= 0, maxVs < height)
                if validIndicesMask.sum() == 0:
                    continue                
                us = us[validIndicesMask]
                minVs = minVs[validIndicesMask]
                maxVs = maxVs[validIndicesMask]                
                edgeIndices = (minVs * width + us)
                for index, edgeIndex in enumerate(edgeIndices):
                    pixel_1 = minVs[index] * width + us[index]
                    pixel_2 = maxVs[index] * width + us[index]
                    proposals_1 = proposals[pixel_1]
                    proposals_2 = proposals[pixel_2]                    
                    if planeIndex_1 in proposals_1 and planeIndex_2 in proposals_2:
                        proposalIndex_1 = np.where(proposals_1 == planeIndex_1)[0][0]
                        proposalIndex_2 = np.where(proposals_2 == planeIndex_2)[0][0]
                        edges_features[edgeIndex, proposalIndex_1, proposalIndex_2] *= edgeWeights[0]
                        pass
                    if planeIndex_2 in proposals_1 and planeIndex_1 in proposals_2:
                        proposalIndex_1 = np.where(proposals_1 == planeIndex_2)[0][0]
                        proposalIndex_2 = np.where(proposals_2 == planeIndex_1)[0][0]
                        edges_features[edgeIndex, proposalIndex_1, proposalIndex_2] *= edgeWeights[0]
                        pass
                    continue
                lineSets[minVs * width + us, 0] = 1
                lineSets[maxVs * width + us, 0] = 1                
                pass
            continue
        continue

    planeDepths = calcPlaneDepths(planes, width, height, np.array([info[0], info[5], info[2], info[6], info[16], info[17], 0, 0, 0, 0])).reshape((height * width, -1))
    planeDepths = readProposalInfo(planeDepths, proposals).reshape((-1, numProposals))

    planeHorizontalVPMask = np.ones((planes.shape[0], 3), dtype=np.bool)
    for VPIndex, planeIndices in enumerate(vpPlaneIndices):
        planeHorizontalVPMask[planeIndices] = False
        continue

    
    for VPIndex, lines in enumerate(VPLines):
        lp = lines[:, :2]
        ln = lines[:, 2:4] - lines[:, :2]
        ln /= np.maximum(np.linalg.norm(ln, axis=-1, keepdims=True), 1e-4)
        ln = np.stack([ln[:, 1], -ln[:, 0]], axis=1)
        lnp = (ln * lp).sum(1, keepdims=True)
        occlusionLines = np.concatenate([ln, lnp], axis=1)
        for occlusionLine in occlusionLines:
            if abs(occlusionLine[0]) > abs(occlusionLine[1]):
                vs = np.arange(height)
                us = (occlusionLine[2] - vs * occlusionLine[1]) / occlusionLine[0]
                minUs = np.floor(us).astype(np.int32)
                maxUs = minUs + 1
                validIndicesMask = np.logical_and(minUs >= 0, maxUs < width)
                vs = vs[validIndicesMask]
                minUs = minUs[validIndicesMask]
                maxUs = maxUs[validIndicesMask]                
                edgeIndices = (height - 1) * width + (vs * (width - 1) + minUs)
                for index, edgeIndex in enumerate(edgeIndices):
                    pixel_1 = vs[index] * width + minUs[index]
                    pixel_2 = vs[index] * width + maxUs[index]
                    proposals_1 = proposals[pixel_1]
                    proposals_2 = proposals[pixel_2]                    
                    for proposalIndex_1, planeIndex_1 in enumerate(proposals_1):
                        if not planeHorizontalVPMask[planeIndex_1][VPIndex]:
                            continue
                        planeDepth_1 = planeDepths[pixel_1][proposalIndex_1]
                        for proposalIndex_2, planeIndex_2 in enumerate(proposals_2):
                            if planeDepths[pixel_2][proposalIndex_2] > planeDepth_1:
                                edges_features[edgeIndex, proposalIndex_1, proposalIndex_2] *= edgeWeights[1]
                                pass
                            continue
                        continue
                    continue
                lineSets[vs * width + minUs, 1] = 1
                lineSets[vs * width + maxUs, 1] = 1
            else:
                us = np.arange(width)
                vs = (occlusionLine[2] - us * occlusionLine[0]) / occlusionLine[1]
                
                minVs = np.floor(vs).astype(np.int32)
                maxVs = minVs + 1
                validIndicesMask = np.logical_and(minVs >= 0, maxVs < height)
                us = us[validIndicesMask]
                minVs = minVs[validIndicesMask]
                maxVs = maxVs[validIndicesMask]                
                edgeIndices = (minVs * width + us)
                for index, edgeIndex in enumerate(edgeIndices):
                    pixel_1 = minVs[index] * width + us[index]
                    pixel_2 = maxVs[index] * width + us[index]
                    proposals_1 = proposals[pixel_1]
                    proposals_2 = proposals[pixel_2]                    
                    for proposalIndex_1, planeIndex_1 in enumerate(proposals_1):
                        if not planeHorizontalVPMask[planeIndex_1][VPIndex]:
                            continue
                        planeDepth_1 = planeDepths[pixel_1][proposalIndex_1]
                        for proposalIndex_2, planeIndex_2 in enumerate(proposals_2):
                            if planeDepths[pixel_2][proposalIndex_2] > planeDepth_1:
                                edges_features[edgeIndex, proposalIndex_1, proposalIndex_2] *= edgeWeights[1]
                                pass
                            continue
                        continue
                    continue
                lineSets[minVs * width + us, 1] = 1
                lineSets[maxVs * width + us, 1] = 1                
                pass
            continue
        continue

    for line in remainingLines:
        if abs(line[3] - line[1]) > abs(line[2] - line[0]):
            if line[3] < line[1]:
                line = np.array([line[2], line[3], line[0], line[1]])
                pass
            vs = np.arange(line[1], line[3] + 1, dtype=np.int32)
            us = line[0] + (vs - line[1]) / (line[3] - line[1]) * (line[2] - line[0])
            minUs = np.floor(us).astype(np.int32)
            maxUs = minUs + 1
            validIndicesMask = np.logical_and(minUs >= 0, maxUs < width)
            vs = vs[validIndicesMask]
            minUs = minUs[validIndicesMask]
            maxUs = maxUs[validIndicesMask]                
            edgeIndices = (height - 1) * width + (vs * (width - 1) + minUs)
            for edgeIndex in edgeIndices:
                edges_features[edgeIndex] *= edgeWeights[2]
                continue
            lineSets[(vs * width + minUs), 2] = 1
            lineSets[(vs * width + maxUs), 2] = 1            
        else:
            if line[2] < line[0]:
                line = np.array([line[2], line[3], line[0], line[1]])
                pass
            us = np.arange(line[0], line[2] + 1, dtype=np.int32)
            vs = line[1] + (us - line[0]) / (line[2] - line[0]) * (line[3] - line[1])
            
            minVs = np.floor(vs).astype(np.int32)
            maxVs = minVs + 1
            validIndicesMask = np.logical_and(minVs >= 0, maxVs < height)
            us = us[validIndicesMask]
            minVs = minVs[validIndicesMask]
            maxVs = maxVs[validIndicesMask]
            edgeIndices = (minVs * width + us)
            for edgeIndex in edgeIndices:
                edges_features[edgeIndex] *= edgeWeights[2]
                continue
            lineSets[minVs * width + us, 2] = 1
            lineSets[maxVs * width + us, 2] = 1
            continue
        continue
    cv2.imwrite('test/line_sets.png', drawMaskImage(lineSets.reshape((height, width, 3))))
    

    if 'smoothnessWeight' in parameters:
        smoothnessWeight = parameters['smoothnessWeight']
    else:
        smoothnessWeight = 4
        pass

    print('start')
    refined_segmentation = inference_ogm(unaries, -edges_features * smoothnessWeight, edges, return_energy=False, alg='trw')
    print('done')
    refined_segmentation = refined_segmentation.reshape([height, width, 1])    
    refined_segmentation = readProposalInfo(proposals, refined_segmentation)
    planeSegmentation = refined_segmentation.reshape([height, width])

    planeSegmentation[np.logical_not(validMask.reshape((height, width)))] = planes.shape[0]    
    cv2.imwrite('test/segmentation_refined.png', drawSegmentationImage(planeSegmentation))
    
    return planes, planeSegmentation
示例#54
0
def find_characters(vocab_filename, training_feats, train_labels, test_feats):

    window = 64
    f = open('datasets/ImageSets/val.txt')
    wa = open('svm_test/waldo.txt', 'w+')
    we = open('svm_test/wenda.txt', 'w+')
    wi = open('svm_test/wizard.txt', 'w+')

    image_id = f.readline().rstrip()
    while image_id:
        print(image_id)
        print("processing")
        image = np.asarray(
            plt.imread('datasets/JPEGImages/' + image_id + '.jpg'))
        H, W, chan = image.shape
        img_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

        test_feats = []

        orb = cv2.ORB_create()
        #         orb = cv2.ORB_create(nfeatures=1000, scoreType=cv2.ORB_FAST_SCORE)
        kp, des = orb.detectAndCompute(img_gray, None)

        # #         minHessian = 400
        # #         detector = cv2.xfeatures2d_SURF.create(hessianThreshold=minHessian)
        # #         kp = detector.detect(img_gray)

        #             fast = cv2.FastFeatureDetector_create()
        #         # find and draw the keypoints
        #         kp = fast.detect(img_gray,None)
        #         img_kp = cv2.drawKeypoints(img_gray, kp, None, color=(0,0,255), flags=cv2.DrawMatchesFlags_DEFAULT)

        for idx in range(len(kp)):
            j, i = kp[idx].pt

            i = int(np.round(i))
            j = int(np.round(j))
            i_end = i + window
            j_end = j + window

            i_end = min(i_end, H - 1)
            j_end = min(j_end, W - 1)

            img = img_gray[i:i_end, j:j_end]
            feats = bags_of_sifts_image(img_gray, vocab_filename)
            test_feats.extend(feats)

        numOfMax = 5
        probability = svm_probability(training_feats, train_labels, test_feats)

        locations = np.argpartition(-probability, numOfMax, axis=0)[:numOfMax]

        for k in range(len(locations[0])):
            for l in range(numOfMax):

                y, x = kp[locations[l][k]].pt

                x = int(np.round(x))
                y = int(np.round(y))
                y_end = y + window
                x_end = x + window

                x_end = min(x_end, H - 1)
                y_end = min(y_end, W - 1)

                patch = img_gray[x:x_end, y:y_end]

                if (probability[locations[l][k]][k] > 0.4):
                    if k == 0:
                        res = image_id + ' ' + str(probability[
                            locations[l][k]][k]) + ' ' + str(x) + ' ' + str(
                                y) + ' ' + str(x_end) + ' ' + str(y_end) + '\n'
                        wa.write(res)
                    if k == 1:
                        res = image_id + ' ' + str(
                            np.max(probability[locations[l][k]][k])
                        ) + ' ' + str(x) + ' ' + str(y) + ' ' + str(
                            x_end) + ' ' + str(y_end) + '\n'
                        we.write(res)
                    if k == 2:
                        res = image_id + ' ' + str(
                            np.max(probability[locations[l][k]][k])
                        ) + ' ' + str(x) + ' ' + str(y) + ' ' + str(
                            x_end) + ' ' + str(y_end) + '\n'
                        wi.write(res)
        image_id = f.readline().rstrip()
示例#55
0
def lowest_indices(ary, n):
    """Returns the n lowest indices from a numpy array."""
    flat = ary.flatten()
    indices = np.argpartition(flat, n)[:n]
    indices = indices[np.argsort(flat[indices])]
    return np.unravel_index(indices, ary.shape)
示例#56
0
def fitPlanesManhattan(image, depth, normal, info, numOutputPlanes=20, imageIndex=-1, parameters={}):
    if 'meanshift' in parameters and parameters['meanshift'] > 0:
        import sklearn.cluster
        meanshift = sklearn.cluster.MeanShift(parameters['meanshift'])
        pass

    
    height = depth.shape[0]
    width = depth.shape[1]

    camera = getCameraFromInfo(info)
    urange = (np.arange(width, dtype=np.float32) / (width) * (camera['width']) - camera['cx']) / camera['fx']
    urange = urange.reshape(1, -1).repeat(height, 0)
    vrange = (np.arange(height, dtype=np.float32) / (height) * (camera['height']) - camera['cy']) / camera['fy']
    vrange = vrange.reshape(-1, 1).repeat(width, 1)
    
    X = depth * urange
    Y = depth
    Z = -depth * vrange


    normals = normal.reshape((-1, 3))
    normals = normals / np.maximum(np.linalg.norm(normals, axis=-1, keepdims=True), 1e-4)

    validMask = np.logical_and(np.linalg.norm(normals, axis=-1) > 1e-4, depth.reshape(-1) > 1e-4)
    
    valid_normals = normals[validMask]

    
    points = np.stack([X, Y, Z], axis=2).reshape(-1, 3)
    valid_points = points[validMask]

    polarAngles = np.arange(16) * np.pi / 2 / 16
    azimuthalAngles = np.arange(64) * np.pi * 2 / 64
    polarAngles = np.expand_dims(polarAngles, -1)
    azimuthalAngles = np.expand_dims(azimuthalAngles, 0)

    normalBins = np.stack([np.sin(polarAngles) * np.cos(azimuthalAngles), np.tile(np.cos(polarAngles), [1, azimuthalAngles.shape[1]]), -np.sin(polarAngles) * np.sin(azimuthalAngles)], axis=2)
    normalBins = np.reshape(normalBins, [-1, 3])
    numBins = normalBins.shape[0]
    
    
    normalDiff = np.tensordot(valid_normals, normalBins, axes=([1], [1]))
    normalDiffSign = np.sign(normalDiff)
    normalDiff = np.maximum(normalDiff, -normalDiff)
    normalMask = one_hot(np.argmax(normalDiff, axis=-1), numBins)
    bins = normalMask.sum(0)
    np.expand_dims(valid_normals, 1) * np.expand_dims(normalMask, -1)

    maxNormals = np.expand_dims(valid_normals, 1) * np.expand_dims(normalMask, -1)
    maxNormals *= np.expand_dims(normalDiffSign, -1)
    averageNormals = maxNormals.sum(0) / np.maximum(np.expand_dims(bins, -1), 1e-4)
    averageNormals /= np.maximum(np.linalg.norm(averageNormals, axis=-1, keepdims=True), 1e-4)
    dominantNormal_1 = averageNormals[np.argmax(bins)]

    dotThreshold_1 = np.cos(np.deg2rad(100))
    dotThreshold_2 = np.cos(np.deg2rad(80))
    
    dot_1 = np.tensordot(normalBins, dominantNormal_1, axes=([1], [0]))
    bins[np.logical_or(dot_1 < dotThreshold_1, dot_1 > dotThreshold_2)] = 0
    dominantNormal_2 = averageNormals[np.argmax(bins)]
    dot_2 = np.tensordot(normalBins, dominantNormal_2, axes=([1], [0]))
    bins[np.logical_or(dot_2 < dotThreshold_1, dot_2 > dotThreshold_2)] = 0
    
    dominantNormal_3 = averageNormals[np.argmax(bins)]


    dominantNormals = np.stack([dominantNormal_1, dominantNormal_2, dominantNormal_3], axis=0)

    dominantNormalImage = np.abs(np.matmul(normal, dominantNormals.transpose()))
    
    planeHypothesisAreaThreshold = width * height * 0.01

    
    planes = []
    
    if 'offsetGap' in parameters:
        offsetGap = parameters['offsetGap']
    else:
        offsetGap = 0.1
        pass
    for dominantNormal in dominantNormals:
        offsets = np.tensordot(valid_points, dominantNormal, axes=([1], [0]))

        if 'meanshift' in parameters and parameters['meanshift'] > 0:
            sampleInds = np.arange(offsets.shape[0])
            np.random.shuffle(sampleInds)
            meanshift.fit(np.expand_dims(offsets[sampleInds[:int(offsets.shape[0] * 0.02)]], -1))
            for offset in meanshift.cluster_centers_:
                planes.append(dominantNormal * offset)
                continue
            
        offset = offsets.min()
        maxOffset = offsets.max()
        while offset < maxOffset:
            planeMask = np.logical_and(offsets >= offset, offsets < offset + offsetGap)
            segmentOffsets = offsets[np.logical_and(offsets >= offset, offsets < offset + offsetGap)]
            if segmentOffsets.shape[0] < planeHypothesisAreaThreshold:
                offset += offsetGap
                continue
            planeD = segmentOffsets.mean()
            planes.append(dominantNormal * planeD)
            offset = planeD + offsetGap
            continue
        continue
    
    if len(planes) == 0:
        return np.array([]), np.zeros(segmentation.shape).astype(np.int32)
    
    planes = np.array(planes)
    print('number of planes ', planes.shape[0])

    vanishingPoints = np.stack([dominantNormals[:, 0] / np.maximum(dominantNormals[:, 1], 1e-4) * info[0] + info[2], -dominantNormals[:, 2] / np.maximum(dominantNormals[:, 1], 1e-4) * info[5] + info[6]], axis=1)
    vanishingPoints[:, 0] *= width / info[16]
    vanishingPoints[:, 1] *= height / info[17]

    indices = np.arange(width * height, dtype=np.int32)
    uv = np.stack([indices % width, indices // width], axis=1)
    colors = image.reshape((-1, 3))
    windowW = 9
    windowH = 3
    dominantLineMaps = []
    for vanishingPointIndex, vanishingPoint in enumerate(vanishingPoints):
        horizontalDirection = uv - np.expand_dims(vanishingPoint, 0)
        horizontalDirection = horizontalDirection / np.maximum(np.linalg.norm(horizontalDirection, axis=1, keepdims=True), 1e-4)
        verticalDirection = np.stack([horizontalDirection[:, 1], -horizontalDirection[:, 0]], axis=1)

        colorDiffs = []
        for directionIndex, direction in enumerate([horizontalDirection, verticalDirection]):
            neighbors = uv + direction
            neighborsX = neighbors[:, 0]
            neighborsY = neighbors[:, 1]
            neighborsMinX = np.maximum(np.minimum(np.floor(neighborsX).astype(np.int32), width - 1), 0)
            neighborsMaxX = np.maximum(np.minimum(np.ceil(neighborsX).astype(np.int32), width - 1), 0)
            neighborsMinY = np.maximum(np.minimum(np.floor(neighborsY).astype(np.int32), height - 1), 0)
            neighborsMaxY = np.maximum(np.minimum(np.ceil(neighborsY).astype(np.int32), height - 1), 0)
            indices_1 = neighborsMinY * width + neighborsMinX
            indices_2 = neighborsMaxY * width + neighborsMinX
            indices_3 = neighborsMinY * width + neighborsMaxX            
            indices_4 = neighborsMaxY * width + neighborsMaxX
            areas_1 = (neighborsMaxX - neighborsX) * (neighborsMaxY - neighborsY)
            areas_2 = (neighborsMaxX - neighborsX) * (neighborsY - neighborsMinY)
            areas_3 = (neighborsX - neighborsMinX) * (neighborsMaxY - neighborsY)
            areas_4 = (neighborsX - neighborsMinX) * (neighborsY - neighborsMinY)

            neighborsColor = colors[indices_1] * np.expand_dims(areas_1, -1) + colors[indices_2] * np.expand_dims(areas_2, -1) + colors[indices_3] * np.expand_dims(areas_3, -1) + colors[indices_4] * np.expand_dims(areas_4, -1)
            colorDiff = np.linalg.norm(neighborsColor - colors, axis=-1)

            colorDiffs.append(colorDiff)
            continue
        colorDiffs = np.stack(colorDiffs, 1)

        deltaUs, deltaVs = np.meshgrid(np.arange(windowW) - (windowW - 1) / 2, np.arange(windowH) - (windowH - 1) / 2)
        deltas = deltaUs.reshape((1, -1, 1)) * np.expand_dims(horizontalDirection, axis=1) + deltaVs.reshape((1, -1, 1)) * np.expand_dims(verticalDirection, axis=1)
        
        windowIndices = np.expand_dims(uv, 1) - deltas
        windowIndices = (np.minimum(np.maximum(np.round(windowIndices[:, :, 1]), 0), height - 1) * width + np.minimum(np.maximum(np.round(windowIndices[:, :, 0]), 0), width - 1)).astype(np.int32)
        
        dominantLineMap = []

        for pixels in windowIndices:
            gradientSums = colorDiffs[pixels].sum(0)
            dominantLineMap.append(gradientSums[1] / max(gradientSums[0], 1e-4))
            continue
        dominantLineMaps.append(np.array(dominantLineMap).reshape((height, width)))
        continue
    dominantLineMaps = np.stack(dominantLineMaps, axis=2)
    if 'dominantLineThreshold' in parameters:
        dominantLineThreshold = parameters['dominantLineThreshold']
    else:
        dominantLineThreshold = 3
        pass

    smoothnessWeightMask = dominantLineMaps.max(2) > dominantLineThreshold
    
    planesD = np.linalg.norm(planes, axis=1, keepdims=True)
    planeNormals = planes / np.maximum(planesD, 1e-4)


    if 'distanceCostThreshold' in parameters:
        distanceCostThreshold = parameters['distanceCostThreshold']
    else:
        distanceCostThreshold = 0.05
        pass
    
    distanceCost = np.abs(np.tensordot(points, planeNormals, axes=([1, 1])) - np.reshape(planesD, [1, -1])) / distanceCostThreshold

    normalCost = 0
    normalCostThreshold = 1 - np.cos(np.deg2rad(30))        
    normalCost = (1 - np.abs(np.tensordot(normals, planeNormals, axes=([1, 1])))) / normalCostThreshold
    
    unaryCost = distanceCost + normalCost
    unaryCost *= np.expand_dims(validMask.astype(np.float32), -1)
    unaries = unaryCost.reshape((width * height, -1))

    if False:
        cv2.imwrite('test/dominant_normal.png', drawMaskImage(dominantNormalImage))
        
        if imageIndex >= 0:
            cv2.imwrite('test/' + str(imageIndex) + '_dominant_lines.png', drawMaskImage(dominantLineMaps / dominantLineThreshold))
        else:
            cv2.imwrite('test/dominant_lines.png', drawMaskImage(dominantLineMaps / dominantLineThreshold))
            pass
        cv2.imwrite('test/dominant_lines_mask.png', drawMaskImage(smoothnessWeightMask))            
        cv2.imwrite('test/distance_cost.png', drawSegmentationImage(-distanceCost.reshape((height, width, -1)), unaryCost.shape[-1] - 1))
        cv2.imwrite('test/normal_cost.png', drawSegmentationImage(-normalCost.reshape((height, width, -1)), unaryCost.shape[-1] - 1))
        cv2.imwrite('test/unary_cost.png', drawSegmentationImage(-unaryCost.reshape((height, width, -1)), blackIndex=unaryCost.shape[-1] - 1))
        cv2.imwrite('test/segmentation.png', drawSegmentationImage(-unaries.reshape((height, width, -1)), blackIndex=unaries.shape[-1]))
        pass

    if 'numProposals' in parameters:
        numProposals = parameters['numProposals']
    else:
        numProposals = 3
        pass
    numProposals = min(numProposals, unaries.shape[-1] - 1)
    proposals = np.argpartition(unaries, numProposals)[:, :numProposals]
    proposals[np.logical_not(validMask)] = 0
    
    unaries = -readProposalInfo(unaries, proposals).reshape((-1, numProposals))
    
    nodes = np.arange(height * width).reshape((height, width))

    deltas = [(0, 1), (1, 0)]
    
    edges = []
    edges_features = []
    smoothnessWeights = 1 - 0.99 * smoothnessWeightMask.astype(np.float32)
    
    for delta in deltas:
        deltaX = delta[0]
        deltaY = delta[1]
        partial_nodes = nodes[max(-deltaY, 0):min(height - deltaY, height), max(-deltaX, 0):min(width - deltaX, width)].reshape(-1)
        edges.append(np.stack([partial_nodes, partial_nodes + (deltaY * width + deltaX)], axis=1))

        labelDiff = (np.expand_dims(proposals[partial_nodes], -1) != np.expand_dims(proposals[partial_nodes + (deltaY * width + deltaX)], 1)).astype(np.float32)
        edges_features.append(labelDiff * smoothnessWeights.reshape((width * height, -1))[partial_nodes].reshape(-1, 1, 1))
        continue

    edges = np.concatenate(edges, axis=0)
    edges_features = np.concatenate(edges_features, axis=0)


    if 'smoothnessWeight' in parameters:
        smoothnessWeight = parameters['smoothnessWeight']
    else:
        smoothnessWeight = 40
        pass

    print('start')
    refined_segmentation = inference_ogm(unaries, -edges_features * smoothnessWeight, edges, return_energy=False, alg='trw')
    print('done')
    
    refined_segmentation = refined_segmentation.reshape([height, width, 1])
    refined_segmentation = readProposalInfo(proposals, refined_segmentation)
    planeSegmentation = refined_segmentation.reshape([height, width])

    planeSegmentation[np.logical_not(validMask.reshape((height, width)))] = planes.shape[0]

    cv2.imwrite('test/segmentation_refined.png', drawSegmentationImage(planeSegmentation))
    
    return planes, planeSegmentation
with tf.Session() as sess:
    saver.restore(sess, INCEPTION_V4_CHECKPOINT_PATH)
    predictions_val = predictions.eval(feed_dict={X: X_test})

# In[60]:

most_likely_class_index = np.argmax(predictions_val[0])
most_likely_class_index

# In[61]:

class_names[most_likely_class_index]

# In[62]:

top_5 = np.argpartition(predictions_val[0], -5)[-5:]
top_5 = reversed(top_5[np.argsort(predictions_val[0][top_5])])
for i in top_5:
    print("{0}: {1:.2f}%".format(class_names[i], 100 * predictions_val[0][i]))

# # Transfer Learning for Large Image Classification

# **Exercise:** Create a training set containing at least 100 images per class. For example, you could classify your own pictures based on the location (beach, mountain, city, etc.), or alternatively you can just use an existing dataset, such as the flowers dataset or MIT's places dataset (requires registration, and it is huge).

# In[ ]:

import sys
import tarfile
from six.moves import urllib

FLOWERS_URL = "http://download.tensorflow.org/example_images/flower_photos.tgz"
示例#58
0
                filled=True)

(graph, ) = pydot.graph_from_dot_file('forest.dot')
graph.write_png('somefile.png')

svm = SVC(kernel='linear', gamma=1, C=100)
svm.fit(X_train, y_train)

svm_prediction = svm.predict(X_test)

print(accuracy_score(y_test, svm_prediction))

coefs = np.ravel(svm.coef_)

k = 10
test = np.argpartition(coefs, len(coefs) - k)[-k:]

test = np.ravel(np.flip(test))

values = x.columns.values

forest = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=0)

forest.fit(X_train, y_train)

forest_prediction = forest.predict(X_test)

print(accuracy_score(y_test, forest_prediction))

result = forest.feature_importances_
示例#59
0
def re_ranking(q_g_dist, q_q_dist, g_g_dist, k1=20, k2=6, lambda_value=0.3):
    # The following naming, e.g. gallery_num, is different from outer scope.
    # Don't care about it.
    original_dist = np.concatenate([
        np.concatenate([q_q_dist, q_g_dist], axis=1),
        np.concatenate([q_g_dist.T, g_g_dist], axis=1)
    ],
                                   axis=0)
    original_dist = 2. - 2 * original_dist  # change the cosine similarity metric to euclidean similarity metric
    original_dist = np.power(original_dist, 2).astype(np.float32)
    original_dist = np.transpose(1. * original_dist /
                                 np.max(original_dist, axis=0))
    V = np.zeros_like(original_dist).astype(np.float32)
    #initial_rank = np.argsort(original_dist).astype(np.int32)
    # top K1+1
    initial_rank = np.argpartition(original_dist, range(1, k1 + 1))

    query_num = q_g_dist.shape[0]
    all_num = original_dist.shape[0]

    for i in range(all_num):
        # k-reciprocal neighbors
        k_reciprocal_index = k_reciprocal_neigh(initial_rank, i, k1)
        k_reciprocal_expansion_index = k_reciprocal_index
        for j in range(len(k_reciprocal_index)):
            candidate = k_reciprocal_index[j]
            candidate_k_reciprocal_index = k_reciprocal_neigh(
                initial_rank, candidate, int(np.around(k1 / 2)))
            if len(
                    np.intersect1d(candidate_k_reciprocal_index,
                                   k_reciprocal_index)
            ) > 2. / 3 * len(candidate_k_reciprocal_index):
                k_reciprocal_expansion_index = np.append(
                    k_reciprocal_expansion_index, candidate_k_reciprocal_index)

        k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index)
        weight = np.exp(-original_dist[i, k_reciprocal_expansion_index])
        V[i, k_reciprocal_expansion_index] = 1. * weight / np.sum(weight)

    original_dist = original_dist[:query_num, ]
    if k2 != 1:
        V_qe = np.zeros_like(V, dtype=np.float32)
        for i in range(all_num):
            V_qe[i, :] = np.mean(V[initial_rank[i, :k2], :], axis=0)
        V = V_qe
        del V_qe
    del initial_rank
    invIndex = []
    for i in range(all_num):
        invIndex.append(np.where(V[:, i] != 0)[0])

    jaccard_dist = np.zeros_like(original_dist, dtype=np.float32)

    for i in range(query_num):
        temp_min = np.zeros(shape=[1, all_num], dtype=np.float32)
        indNonZero = np.where(V[i, :] != 0)[0]
        indImages = []
        indImages = [invIndex[ind] for ind in indNonZero]
        for j in range(len(indNonZero)):
            temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + np.minimum(
                V[i, indNonZero[j]], V[indImages[j], indNonZero[j]])
        jaccard_dist[i] = 1 - temp_min / (2. - temp_min)

    final_dist = jaccard_dist * (1 -
                                 lambda_value) + original_dist * lambda_value
    del original_dist
    del V
    del jaccard_dist
    final_dist = final_dist[:query_num, query_num:]
    return final_dist
示例#60
0
def cL(s,x):
    '''returns n-s abs-smallest indices of vector x'''
    ns = len(x)-s
    return np.argpartition(abs(x),ns)[:ns]