Пример #1
0
def getML(S, N=1, testmode=False):
    if testmode:
        reglast, regmax = S[-N:], -bn.partsort(-np.asarray(S, np.int), N)[:N] 
        M, L            = prod(regmax), prod(reglast)
        print("reglast, regmax : ", reglast, regmax)
    else:
        L   = prod(S[-N:])
        M   = prod(-bn.partsort(-np.asarray(S, np.int), N)[:N])
    return M, L
Пример #2
0
def mean_rrank_at_k_batch(train_data,
                          heldout_data,
                          Et,
                          Eb,
                          user_idx,
                          k=5,
                          mu=None,
                          vad_data=None):
    '''
    mean reciprocal rank@k: For each user, make predictions and rank for
    all the items. Then calculate the mean reciprocal rank for the top K that
    are in the held-out set.
    '''
    batch_users = user_idx.stop - user_idx.start

    X_pred = _make_prediction(train_data,
                              Et,
                              Eb,
                              user_idx,
                              batch_users,
                              mu=mu,
                              vad_data=vad_data)
    all_rrank = 1. / (np.argsort(np.argsort(-X_pred, axis=1), axis=1) + 1)
    X_true_binary = (heldout_data[user_idx] > 0).toarray()

    heldout_rrank = X_true_binary * all_rrank
    top_k = bn.partsort(-heldout_rrank, k, axis=1)
    return -top_k[:, :k].mean(axis=1)
Пример #3
0
    def _phase2(self):
        """
		Execute phase 2 of the SP region. This phase is used to compute the
		active columns.
		
		Note - This should only be called after phase 1 has been called and
		after the inhibition radius and neighborhood have been updated.
		"""

        # Shift the outputs
        self.y[:, 1:] = self.y[:, :-1]
        self.y[:, 0] = 0

        # Calculate k
        #   - For a column to be active its overlap must be above the overlap
        #     value of the k-th largest column in its neighborhood.
        k = self._get_num_cols()

        if self.global_inhibition:
            # The neighborhood is all columns, thus the set of active columns
            # is simply columns that have an overlap above the k-th largest
            # in the entire region

            # Compute the winning column indexes
            if self.learn:
                # Randomly break ties
                ix = bn.argpartsort(
                    -self.overlap[:, 0] -
                    self.prng.uniform(.1, .2, self.ncolumns), k)[:k]
            else:
                # Choose the same set of columns each time
                ix = bn.argpartsort(-self.overlap[:, 0], k)[:k]

            # Set the active columns
            self.y[ix, 0] = self.overlap[ix, 0] > 0
        else:
            # The neighborhood is bounded by the inhibition radius, therefore
            # each column's neighborhood must be considered

            for i in xrange(self.ncolumns):
                # Get the neighbors
                ix = np.where(self.neighbors[i])[0]

                # Compute the minimum top overlap
                if ix.shape[0] <= k:
                    # Desired number of candidates is at or below the desired
                    # activity level, so find the overall max
                    m = max(bn.nanmax(self.overlap[ix, 0]), 1)
                else:
                    # Desired number of candidates is above the desired
                    # activity level, so find the k-th largest
                    m = max(-bn.partsort(-self.overlap[ix, 0], k + 1)[k], 1)

                # Set the column activity
                if self.overlap[i, 0] >= m: self.y[i, 0] = True
Пример #4
0
	def _phase2(self):
		"""
		Execute phase 2 of the SP region. This phase is used to compute the
		active columns.
		
		Note - This should only be called after phase 1 has been called and
		after the inhibition radius and neighborhood have been updated.
		"""
		
		# Shift the outputs
		self.y[:, 1:] = self.y[:, :-1]
		self.y[:, 0] = 0
		
		# Calculate k
		#   - For a column to be active its overlap must be above the overlap
		#     value of the k-th largest column in its neighborhood.
		k = self._get_num_cols()
		
		if self.global_inhibition:
			# The neighborhood is all columns, thus the set of active columns
			# is simply columns that have an overlap above the k-th largest
			# in the entire region
			
			# Compute the winning column indexes
			if self.learn:				
				# Randomly break ties
				ix = bn.argpartsort(-self.overlap[:, 0] -
					self.prng.uniform(.1, .2, self.ncolumns), k)[:k]
			else:
				# Choose the same set of columns each time
				ix = bn.argpartsort(-self.overlap[:, 0], k)[:k]
			
			# Set the active columns
			self.y[ix, 0] = self.overlap[ix, 0] > 0
		else:
			# The neighborhood is bounded by the inhibition radius, therefore
			# each column's neighborhood must be considered
			
			for i in xrange(self.ncolumns):
				# Get the neighbors
				ix = np.where(self.neighbors[i])[0]
				
				# Compute the minimum top overlap
				if ix.shape[0] <= k:
					# Desired number of candidates is at or below the desired
					# activity level, so find the overall max
					m = max(bn.nanmax(self.overlap[ix, 0]), 1)
				else:
					# Desired number of candidates is above the desired
					# activity level, so find the k-th largest
					m = max(-bn.partsort(-self.overlap[ix, 0], k + 1)[k], 1)
				
				# Set the column activity
				if self.overlap[i, 0] >= m: self.y[i, 0] = True
Пример #5
0
def mean_rrank_at_k_batch(train_data, heldout_data, Et, Eb,
                          user_idx, k=5, mu=None, vad_data=None):
    '''
    mean reciprocal rank@k: For each user, make predictions and rank for
    all the items. Then calculate the mean reciprocal rank for the top K that
    are in the held-out set.
    '''
    batch_users = user_idx.stop - user_idx.start

    X_pred = _make_prediction(train_data, Et, Eb, user_idx,
                              batch_users, mu=mu, vad_data=vad_data)
    all_rrank = 1. / (np.argsort(np.argsort(-X_pred, axis=1), axis=1) + 1)
    X_true_binary = (heldout_data[user_idx] > 0).toarray()

    heldout_rrank = X_true_binary * all_rrank
    top_k = bn.partsort(-heldout_rrank, k, axis=1)
    return -top_k[:, :k].mean(axis=1)
Пример #6
0
def bn_topn(arr, N, ascending=None):
    """
    Return the top N results. Negative N will give N lowest results

    Paramters
    ---------
    arr : Series
        one dimension array
    N : int
        number of elements to return. Negative numbers will return smallest
    ascending : bool
        Ordering of the return values. Default behavior is greatest absolute
        magnitude.

    Note
    ----
    Default ascending order depends on N and whether you are looking for the
    top and bottom results. If you are looking for the top results, the 
    most positive results will come first. If you are looking for the bottom
    results, then the most negative results comes first
    """
    if arr.ndim > 1:
        raise Exception("Only works on ndim=1")
    if ascending is None:
        ascending = not N > 0

    arr = arr[~np.isnan(arr)]
    if N > 0: # nlargest
        N = min(abs(N), len(arr))
        N = len(arr) - abs(N)
        sl = slice(N, None)
    else: # nsmallest
        N = min(abs(N), len(arr))
        sl = slice(None, N)

    if N == 0:
        bn_res = arr
    else:
        out = nb.partsort(arr, N)
        bn_res = out[sl]

    bn_res = np.sort(bn_res) # sort output
    if not ascending:
        bn_res = bn_res[::-1]
    return bn_res
Пример #7
0
def test_transpose():
    "partsort transpose test"
    a = np.arange(12).reshape(4, 3)
    actual = bn.partsort(a.T, 2, -1).T
    desired = bn.slow.partsort(a.T, 2, -1).T
    assert_equal(actual, desired, 'partsort transpose test')
def test_transpose():
    "partsort transpose test"
    a = np.arange(12).reshape(4, 3)
    actual = bn.partsort(a.T, 2, -1).T
    desired = bn.slow.partsort(a.T, 2, -1).T
    assert_equal(actual, desired, 'partsort transpose test')
Пример #9
0
def n_smallest(arr, n):
    fin = np_ravel(arr)
    fin = partsort(fin, n)
    return fin[:n]
Пример #10
0
def n_largest(arr, n):
    fin = np_ravel(arr)
    fin = 255 - fin
    fin = partsort(fin, n)
    fin = 255 - fin
    return fin[:n]
Пример #11
0
def random_epstein_exact_weird_correction(values_input, nb_to_keep):
    nb_to_remove = len(values_input) - nb_to_keep
    k = nb_to_remove

    values = values_input[:]
    S = set(range(len(values)))
    n = len(S)
    indice_from_which_to_start = len(values)
    Aleft = weighted_average_weird_correction(values)

    Aright = float('inf')  #max([x[0] for x in values])#float('inf')

    while len(S) > 1:
        sampled = sample(S, 1)[0]
        vi = values[sampled][0]
        wi = values[sampled][1]

        X, Y, Z, E = COMPUTE_X_Y_Z_E(S, values, Aleft, Aright, vi, wi)

        while True:
            #print 'HeLLo'
            if len(Z) > 0:

                A = median([(vi - values[j][0]) / (wi - values[j][1])
                            for j in Z])
                #A=[(vi-values[j][0])/(wi-values[j][1]) for j in Z][len(Z)/2]
                #print A

                #print [(vi-values[j][0])/(wi-values[j][1]) for j in Z],A
                l2 = partsort(([A * values[j][1] - values[j][0] for j in S]),
                              len(S) - k)[:len(S) - k]
                F_A = -sum(l2)

                if F_A == 0:

                    return A

                elif F_A > 0:
                    Aleft = A
                else:
                    Aright = A
                X, Y, Z = UPDATE_X_Y_Z(S, values, Aleft, Aright, vi, wi, X, Y,
                                       Z)

            if ((len(X) + len(E)) >= (len(S) - k)) and k > 0:
                nb_to_remove = min(len(E), len(X) + len(E) - (len(S) - k))
                to_remove_E = set(sample(E, nb_to_remove))
                S = S - to_remove_E
                E = E - to_remove_E
                S = S - Y
                #k=k-(len(Y)+nb_to_remove)
                k = k - (len(Y) + nb_to_remove)
                Y = set()

            elif (len(Y) + len(E)) >= k:
                nb_to_collapse = min(len(E), len(Y) + len(E) - k)
                values_to_collapse_E = set(sample(E, nb_to_collapse))
                E = E - values_to_collapse_E
                values_to_collapse = values_to_collapse_E | X
                S = S - values_to_collapse

                collapsed_v = 0.
                collapsed_w = 1.

                for x in values_to_collapse:
                    vx, wx = values[x]
                    collapsed_v += vx
                    collapsed_w += wx

                collapsed = (collapsed_v, collapsed_w)
                values.append(collapsed)
                X = {indice_from_which_to_start}
                S.add(indice_from_which_to_start)
                indice_from_which_to_start += 1

            # if len(Z)<=len(S)/32:
            # 	break
            if len(Z) <= len(S) / 32.:
                break

    spop = S.pop()
    #print values[spop]

    return values[spop][0] / values[spop][1]