Python choice示例，numpy.random.choice Python示例

示例#1

0

显示文件

文件： coasting_beam.py 项目： kiliakis/BLonD-1

def generate_coasting_beam(Beam, t_start, t_stop, spread = 1E-3, spread_type = 'dp/p', energy_offset = 0, distribution = 'gaussian'
				, user_distribution = None, user_probability = None):

	if spread_type == 'dp/p':
		energy_spread = Beam.energy * Beam.beta**2 * spread
	elif spread_type == 'dE/E':
		energy_spread = spread*Beam.energy
	elif spread_type == 'dp':
		energy_spread = Beam.energy * Beam.beta**2 * spread / Beam.momentum
	elif spread_type == 'dE':
		energy_spread = spread
	else:
                #DistributionError
		raise RuntimeError("spread_type not recognised")


	if distribution == 'gaussian':
		Beam.dE = rand.normal(loc = energy_offset, scale = energy_spread, size = Beam.n_macroparticles)

	elif distribution == 'parabolic':
		energyRange = np.linspace(-energy_spread, energy_spread, 10000)
		probabilityDistribution = 1 - (energyRange/energy_spread)**2
		probabilityDistribution /= np.cumsum(probabilityDistribution)[-1]
		Beam.dE = rand.choice(energyRange, size = Beam.n_macroparticles, p = probabilityDistribution) + (rand.rand(Beam.n_macroparticles) - 0.5) * (energyRange[1] - energyRange[0]) + energy_offset

	#If distribution == 'user' is selected the user must supply a uniformly spaced distribution and the assosciated probability for each bin
	#momentum_spread and energy_offset are not used in this instance.
	elif distribution == 'user':
		Beam.dE = rand.choice(user_distribution, size = Beam.n_macroparticles, p = user_probability) + (rand.rand(Beam.n_macroparticles) - 0.5) * (energyRange[1] - energyRange[0])

	else:
                #DistributionError
		raise RuntimeError("distribution type not recognised")

	Beam.dt = rand.rand(Beam.n_macroparticles)*(t_stop - t_start) + t_start

示例#2

0

显示文件

文件： paired_data.py 项目： jvbalen/cover_id

def get_pairs(clique_dict):
    """Get all pairs of cover songs in a clique dataset, and a sample of
        non-cover pairs of the same size.

    Args:
        clique_dict (dict): clique dataset as a dict with clique names
            as keys and lists of song URI's as values

    Returns:
        pairs (list): list of pairs (each a tuple)
    """
    pairs = []
    non_pairs = []
    for this_clique in clique_dict:

        # clique uris 
        clique_uris = clique_dict[this_clique]

        # non-clique uris
        other_cliques = [clique for clique in clique_dict if not clique == this_clique]
        non_clique_uris = [uri for clique in other_cliques for uri in clique_dict[clique]]
        
        # clique pairs
        clique_pairs = list(combinations(clique_uris, 2))

        # clique non-pairs = [some clique uri, some non-clique uri] x len(clique pairs)
        n_clique_pairs = len(clique_pairs)
        clique_sample = choice(clique_uris, n_clique_pairs, replace=True)
        non_clique_sample = choice(non_clique_uris, n_clique_pairs, replace=False)
        clique_non_pairs = zip(clique_sample, non_clique_sample)
        
        pairs.extend(clique_pairs)
        non_pairs.extend(clique_non_pairs)

    return pairs, non_pairs

示例#3

0

显示文件

文件： multitask.py 项目： amoliu/curriculum-deep-RL

    def run(self, num_epochs=1, num_episodes=1):
        num_tasks = len(self.tasks)
        for epoch in range(num_epochs):
            # choose task based on weights.
            ti = -1
            if npr.rand() < self.mab_gamma:
                ti = npr.choice(range(num_tasks), 1)[0]
            else:
                p = np.exp(prob.normalize_log(self.log_weights))
                ti = npr.choice(range(num_tasks), 1, replace=True, p=p)[0]
            task = self.tasks[ti]

            # (TODO) this breaks away the abstraction.
            self.deepQlearn.task = task
            self.dqn.task = task

            # run training.
            self.deepQlearn.run(num_episodes)

            # update weights.
            self.cumulative_epochs += 1
            if self.cumulative_epochs >= self.mab_batch_size:
                self.log_weights[:] = 0.
            else:
                for ti, task in enumerate(self.tasks):
                    performance_gain = eval_policy_reward(self.dqn, task, num_episodes=10000)
                    self.log_weights[ti] += self.mab_gamma * self.mab_scale * performance_gain / num_tasks

示例#4

0

显示文件

文件： lotto_analysis.py 项目： tj0822/Python

def recommendNumberBySum(lottoDictByNumber, lottoDictBySum):
    # 선택할 sum 숫자수
    pickSumCount = 5

    total = sum(list(lottoDictBySum.values()))
    for key in lottoDictBySum:
        lottoDictBySum[key] = lottoDictBySum[key] / total

    pickSumList = choice(list(lottoDictBySum.keys()), pickSumCount, p=list(lottoDictBySum.values()))

    total = sum(list(lottoDictByNumber.values()))
    for key in lottoDictByNumber:
        lottoDictByNumber[key] = lottoDictByNumber[key] / total

    # 추천 번호 리스트
    pickNumberList = []

    for n in pickSumList:
        # 합이 n인 숫자의 조합과 확률 찾기
        listNumbers = getNumberCombinationListBySum(n)
        pList = []
        for numbers in listNumbers:
            p = 0
            for number in numbers:
                p += lottoDictByNumber[number]

            pList.append(p)

        pList = [x / sum(pList) for x in pList]
        idx = choice(list(range(0, listNumbers.__len__())), 1, p=pList)
        pickNumberList.append(listNumbers[idx[0]])

    return pickNumberList

示例#5

0

显示文件

文件： voices_data.py 项目： xenx/speech

    def get_train(self, shape=(100, 20)):
        """Делаем из данных train выборку"""
        train = []
        for key in self.base.keys():
            if len(self.base[key]) >= 2:
                for _ in range(0, len(self.base[key]), 2):
                    values = choice(list(self.base[key]), 2)

                    a = np.asarray(self.base[key][values[0]])
                    b = np.asarray(self.base[key][values[1]])

                    other = choice(list(self.base), 1)[0]
                    c = np.asarray(self.base[other][choice(list(self.base[other]), 1)[0]])

                    other_value = c.copy()
                    value_first = a.copy()
                    value_second = b.copy()

                    other_value.resize(shape)
                    value_first.resize(shape)
                    value_second.resize(shape)

                    train.append((value_first, other_value, value_second))

        return train

示例#6

0

显示文件

文件： choice_functions.py 项目： arjunc12/Ants

def next_edge_uniformn(G, start, explore_prob, n, candidates=None):
    '''
    Picks edges with probability proportional to the edge weights raised to the n-th power
    '''
    if candidates == None:
        candidates = G.neighbors(start)
    
    total_wt = 0.0
    explored = []
    unexplored = []
    explored_weights = []
    for candidate in candidates:
        wt = G[start][candidate]['weight']
        if (wt ** n) <= MIN_DETECTABLE_PHEROMONE:
            unexplored.append(candidate)
        else:
            explored.append(candidate)
            explored_weights.append(wt ** n)
            total_wt += wt ** n
    flip = random()
    if (flip < explore_prob and len(unexplored) > 0) or (len(explored) == 0):
        next = choice(len(unexplored))
        next = unexplored[next]
        return next, True
    elif total_wt == 0:
        print explored_weights
        next = choice(len(candidates))
        next = candidates[next]
        return next, True
    else:
        explored_weights = np.array(explored_weights)
        explored_weights /= total_wt
        next = explored[choice(len(explored), 1, p=explored_weights)[0]]
        return next, False

示例#7

0

显示文件

文件： felix.py 项目： bcmd/BCMD

def mswdd ( x, alpha=1e-5, nlevels=6, boundary=100, prop=0.1 ):
    # pad to the next power of two in size
    N = len(x)
    maxlevs = np.ceil(np.log2(N))
    newlen = 2 ** (1 + maxlevs)
    padlen = newlen - N
    boundary = np.min((boundary, np.floor(prop * N)))
    padbefore = rng.choice(x[0:boundary], np.ceil(padlen/2))
    padafter = rng.choice(x[(N-boundary+1):N], np.floor(padlen/2))
    padded = np.concatenate((padbefore, x, padafter))
    
    # get wavelet transform
    J = np.min((nlevels + 1, maxlevs + 1))
    vsg = wv.dwt.swt(padded, J, 'db1')[0].reshape(vsg, (J, newlen))

    # shift rows to align the scale levels
    shift = newlen/2
    for ii in range(1, vsg.shape[0]):
        idx = range(newlen - shift, newlen)
        idx.extend(range(newlen - shift))
        vsg[ii,] = vsg[ii, idx]
        shift = shift/2
    
    # drop 1st (DC) row and padding
    vsg = vsg[1:,len(padbefore):(len(padbefore)+N)]
    
    return discontinuities(vsg, alpha)

示例#8

0

显示文件

文件： choice_functions.py 项目： arjunc12/Ants

def next_edge_maxa(G, start, explore_prob, candidates=None):
    '''
    With some probability, picks equally among the edges whose weight is lower than the 
    Otherwise, picks equally among all edges tied for the highest edge weight. Note
    that on explore steps it can still pick the highest-weighted edge
    '''
    if candidates == None:
        candidates = G.neighbors(start)
    
    # compute highest adjacent edge weight    
    max_wt = float("-inf")
    for candidate in candidates:
        max_wt = max(max_wt, G[start][candidate]['weight'])
    
    # split neighbors into maximally weighted and non-maximally weighted edges
    max_neighbors = []
    nonmax_neighbors = []
    for candidate in candidates:
        wt = G[start][candidate]['weight']
        # Edges with too small weight not considered maximal
        if wt == max_wt and wt > MIN_DETECTABLE_PHEROMONE:
            max_neighbors.append(candidate)
        else:
            nonmax_neighbors.append(candidate)
            
    flip = random()
    # Explores non-maximal edge with probability explore_prob
    if (flip < explore_prob and len(nonmax_neighbors) > 0) or (len(max_neighbors) == 0):
        next = choice(len(candidates))
        next = candidates[next]
        return next, True
    else:
        next = choice(len(max_neighbors))
        next = max_neighbors[next]
        return next, False

示例#9

0

显示文件

文件： choice_functions.py 项目： arjunc12/Ants

def next_edge_maxz(G, start, explore_prob, candidates=None):
    '''
    With some probability, picks equally among zero edges, otherwise picks equally among
    maximal edges. This choice function ignores all edges in the 'middle', i.e. edges that
    are neither maximal nor minimal
    '''
    if candidates == None:
        candidates = G.neighbors(start)
        
    max_wt = float("-inf")
    for candidate in candidates:
        max_wt = max(max_wt, G[start][candidate]['weight'])
    
    max_neighbors = []
    nonmax_neighbors = []
    for candidate in candidates:
        wt = G[start][candidate]['weight']
        if wt == max_wt and wt > MIN_DETECTABLE_PHEROMONE:
            max_neighbors.append(candidate)
        elif wt <= MIN_DETECTABLE_PHEROMONE:
            nonmax_neighbors.append(candidate)
            
    flip = random()
    if (flip < explore_prob and len(nonmax_neighbors) > 0) or (len(max_neighbors) == 0):
        next = choice(len(nonmax_neighbors))
        next = nonmax_neighbors[next]
        return next, True
    else:
        next = choice(len(max_neighbors))
        next = max_neighbors[next]
        return next, False

示例#10

0

显示文件

文件： toy_dataset.py 项目： pjadzinsky/RNN_Experiments

    def recursive(self, opened):
        sample = choice(len(self.probabilities), 1, p=self.probabilities)
        sample = sample[0]
        length = 2
        # ans = self.open_mark[opened]
        ans = [opened]
        while (sample != opened):
            if (sample < opened):
                # ans.append(0)
                # length += 1
                pass

            # Open a new depth of recursion
            else:
                (new_depth, new_length) = self.recursive(sample)
                ans.extend(new_depth)
                length += new_length

            sample = choice(len(self.probabilities), 1, p=self.probabilities)
            sample = sample[0]

        # Close the depth of recursion
        # return (ans + self.close_mark[sample], length)
        ans.append(opened + self.depth)
        return (ans, length)

示例#11

0

显示文件

文件： Lifie.py 项目： codeSamuraii/Lifie

        def _deplacement_alea(individu):
            """Déplace d'une case un individu.

            On choisit d'abord le déplacement que va effectuer l'indivu
            puis on vérifie que celui-ci est dans les limites de la matrice,
            ne va pas dans l'eau ou sur un autre ennemi.
            On change la l'attribu position de l'individu de manière à
            ce qu'il soit déplacé plus tard.

            """
            alea_ligne = rd.choice((-1, 0, 1))
            alea_colonne = rd.choice((-1, 0, 1))
            x_ini = deepcopy(individu.position[0])
            y_ini = deepcopy(individu.position[1])

            # On vérifie que le déplacement est dans les limites de la matrice.
            if 0 <= x_ini + alea_ligne and x_ini + alea_ligne < self.matrice.shape[0]:
                if int(str(self.matrice[individu.position[0] + alea_ligne, individu.position[1]])) < 100 and int(str(self.matrice[individu.position[0] + alea_ligne, individu.position[1]])) != 0:
                    individu.position[0] += alea_ligne
                
            if 0 <= y_ini + alea_colonne and y_ini + alea_colonne < self.matrice.shape[1]:
                if int(str(self.matrice[individu.position[0], individu.position[1] + alea_colonne])) < 100 and int(str(self.matrice[individu.position[0], individu.position[1] + alea_colonne])) != 0:
                    individu.position[1] += alea_colonne

            individu.ex_position = [x_ini, y_ini]

示例#12

0

显示文件

文件： basin_hopping.py 项目： conwayje/PGSS

def shell_move(inAtom,atomIndex):  
  #  we're going to be changing the position of atomIndex inside inAtom
  #  make sure that you remove any crazy outliers before you do this
  #  or else it'll just make a bunch more outliers, which is a poor idea
  #  make sure atomIndex comes from range(len(inAtom.get_positions())) so we don't get out of bounds
  try:
    inCOM = inAtom.get_center_of_mass()
    inDistances = distanceCenter(inAtom)
    
    ninetyNinthRadius = stats.scoreatpercentile(inDistances,99)
    ninetyFifthRadius = stats.scoreatpercentile(inDistances,95) 
    outerFourRadius = ninetyNinthRadius - ninetyFifthRadius
    
    randomNewRadius = random.gauss( (ninetyNinthRadius+ninetyFifthRadius)/2 , (ninetyNinthRadius - ninetyFifthRadius)/2 )
    xFromCenter = random.uniform(0,randomNewRadius)
    randomNewRadius = ((randomNewRadius**2) - (xFromCenter**2))**0.5
    yFromCenter = random.uniform(0,randomNewRadius)
    zFromCenter = ((randomNewRadius**2) - (yFromCenter**2))**0.5
    
    newXPosition = inCOM[0] + random.choice([-1,1])*xFromCenter
    newYPosition = inCOM[1] + random.choice([-1,1])*yFromCenter
    newZPosition = inCOM[2] + random.choice([-1,1])*zFromCenter
    
    positionArray = inAtom.get_positions()
    positionArray[atomIndex] = (newXPosition,newYPosition,newZPosition)
    inAtom.set_positions(positionArray)
    
    return inAtom
    
  except IndexError:
    print "The index of the atom you wanted to move is too high or too low."
    print "Please check your function call of shell_move(a,b)"
    print "-Jeff"

示例#13

0

显示文件

文件： abstract_hmm.py 项目： keryil/kereHMM

 def transition(self):
     text = "Transition to"
     if self.current_state is not None:
         self.current_state = choice(range(self.nStates), p=self.transitionMatrix[self.current_state,])
     else:
         text = "Start at"
         self.current_state = choice(range(self.nStates), p=self.initialProbabilities)

示例#14

0

显示文件

文件： controllers.py 项目： hassaanaliw/virality_prediction_game

def setup_images(current_uuid, subreddit, num_questions):

    thresholds, weights = get_thresholds(subreddit)

    image_classes = []

    query_1 = Post.query.filter(Post.year_posted == 2014, Post.show_to_users == "t", Post.subreddit == subreddit)
    for t in thresholds:
        temp_images = query_1.filter(Post.score >= t[0], Post.score <= t[1]).order_by(db.func.random())
        image_classes.append(temp_images)

    indices = [0] * len(thresholds)
    image_pairs = []
    for i in np.arange(num_questions):
        first_threshold = choice(np.arange(len(thresholds)), p=weights)
        second_threshold = choice(np.arange(len(thresholds)), p=weights)

        first_index = indices[first_threshold]
        first_image = image_classes[first_threshold].offset(first_index).first()
        indices[first_threshold] = first_index + 1

        second_index = indices[second_threshold]

        second_image = image_classes[second_threshold].offset(second_index).first()
        while first_image.score == second_image.score:
            second_index += 1
            second_image = image_classes[second_threshold].offset(second_index).first()
        indices[second_threshold] = second_index + 1
        image_pairs.append([first_image, second_image])

    mc = pylibmc.Client(["127.0.0.1"], binary=True, behaviors={"tcp_nodelay": True, "ketama": True})
    mc.set(current_uuid + "_images", image_pairs, time=10 * 60)

    return image_pairs

示例#15

0

显示文件

文件： basin_hopping.py 项目： conwayje/PGSS

def ball_move(inAtom,atomIndex):
  """takes an atom defined by atomIndex inside of inAtom
  and moves it somewhere within the core of the atom randomly.
  Atoms will almost always end up inside the sphere which
  contains 85% of the atoms, centered at the center of mass."""
  #  we're going to be changing the position of atomIndex inside inAtom
  #  we'll take atom of index atomIndex and throw it somewhere inside the core
  #  make sure that you remove any crazy outliers before you do this
  #  or else it'll just make a bunch more outliers, which is a poor idea
  try:
    #get all the distances from the center of mass
    inCOM = inAtom.get_center_of_mass()
    inDistances = distanceCenter(inAtom)
    #figure out the distance from the core to the 85th percentile
    #we'll consider "the core" to be the sphere which contains 85% of the atoms
    eightyFifthRadius = stats.scoreatpercentile(inDistances,85)
    #pick a new distance from center somewhere inside that 85th percentile limit
    randomNewRadius = random.gauss(eightyFifthRadius/2, eightyFifthRadius/3 )
    xFromCenter = random.uniform(0,randomNewRadius)
    randomNewRadius = ((randomNewRadius**2) - (xFromCenter**2))**0.5
    yFromCenter = random.uniform(0,randomNewRadius)
    zFromCenter = ((randomNewRadius**2) - (yFromCenter**2))**0.5
    newXPosition = inCOM[0] + random.choice([-1,1])*xFromCenter
    newYPosition = inCOM[1] + random.choice([-1,1])*yFromCenter
    newZPosition = inCOM[2] + random.choice([-1,1])*zFromCenter
    positionArray = inAtom.get_positions()
    positionArray[atomIndex] = (newXPosition,newYPosition,newZPosition)
    inAtom.set_positions(positionArray)
    return inAtom
  except IndexError:
    print "The index of the atom you wanted to move is too high or too low."
    print "Please check your function call of ball_move(a,b)"
    print "-Jeff"

示例#16

0

显示文件

文件： linear_fapprox.py 项目： ChadACrawford/artificial-intelligence

def qlearn2(m, f, alpha=0.5, gamma=1.0, epsilon=0.2, num_episodes=1000):
    actions = m.actions()
    theta = dict()
    for a in actions:
        theta[a] = np.zeros((f.num_features,))
    for episode in range(num_episodes):
        t = 0
        s = m.start()
        a = npr.choice(m.actions(s))
        q = 0
        while not m.is_terminal(s):
            s2, r2 = m.act(s, a)
            # if r2 < -1 or s2[0] > 5 and s2[1] < 5:
            #     print "wat", s2, r2
            qp = sum(theta[a] * f.features(s))
            # print a, s, s2, r2, [(a, r2 + gamma * qp) for a in m.actions(s)]
            actions = m.actions(s)
            random.shuffle(actions)
            a2, q2 = max([(a2, r2 + gamma * sum(theta[a2] * f.features(s2))) for a2 in actions], key=lambda x: x[1])
            delta = q2 - qp
            # if random.random() < 0.01:
            #     print delta
            # print a, delta, s, s2
            theta[a] += alpha * delta * (f.features(s))
            if npr.random() < epsilon:
                a2 = npr.choice(m.actions(s))
            q = q2
            s = s2
            a = a2
            t += 1
        print episode+1, t, s, r2

    pi = lambda s: max([(a, sum(theta[a] * f.features(s))) for a in m.actions(s)], key=lambda x: x[1])[0]
    f_exp = lambda s: max(sum(theta[a] * f.features(s)) for a in m.actions(s))
    return pi, f_exp, theta

示例#17

0

显示文件

文件： linear_fapprox.py 项目： ChadACrawford/artificial-intelligence

def qlearn1(m, f, lmbda=0.1, alpha=0.3, gamma=0.9999, epsilon=0.2, num_episodes=100):
    theta = np.zeros((f.num_features,))
    t = 1
    for _ in range(num_episodes):
        s = m.start()
        a = npr.choice(m.actions(s))
        e = np.zeros((theta.size,))
        print t
        while not m.is_terminal(s):
            #print t
            #print s
            phi_a1 = feature_estimate(m, s, a, f)
            e += phi_a1
            s2, r2 = m.act(s, a)
            delta = r2 - sum(theta * phi_a1)
            q = []
            for a2 in m.actions(s2):
                phi_a2 = feature_estimate(m, s2, a2, f)
                q.append(sum(theta * phi_a2))
            delta += (gamma**t) * max(q)
            theta = (1-alpha) * theta + alpha * delta * e
            if npr.random() < 1 - epsilon:
                a2 = max(zip(m.actions(s2), q), key=lambda x: x[1])[0]
                e *= (gamma**t) * lmbda
            else:
                a2 = npr.choice(m.actions(s2))
                e = np.zeros((theta.size,))
            s = s2
            a = a2
            t += 1

    pi = lambda s: max([(a, sum(theta * feature_estimate(m, s, a, f))) for a in m.actions(s)], key=lambda x: x[1])[0]
    return pi, theta

示例#18

0

显示文件

文件： agent.py 项目： witnesslq/MLND_pro4

    def choose_action(self, state):
        """ The choose_action function is called when the agent is asked to choose
            which action to take, based on the 'state' the smartcab is in. """

        # Set the agent state and default action
        self.state = state
        self.next_waypoint = self.planner.next_waypoint()
        action = None

        ########### 
        ## TO DO ##
        ###########
        # When not learning, choose a random action
        # When learning, choose a random action with 'epsilon' probability
        #   Otherwise, choose an action with the highest Q-value for the current state
        if self.learning:
            rando = random.randint(0, 99)

            if rando < ( self.epsilon * 100 ) :
                action = choice(self.valid_actions)
            else:
                maxQ = self.get_maxQ(state)
                Qs= self.Q[state]
                possible_A = [action for action, value in Qs.items() if value == maxQ]
                action = choice(possible_A)
        else:
            action = choice(self.valid_actions)

        return action

示例#19

0

显示文件

文件： GeneticBuilder.py 项目： ryanpeach/SurvivalAI

def resample(gen, scores, percent_eliminate = .5, mutation_rate = 5):
    """ Breeds generation together based on their scores. Returns new generation. """
    assert(len(gen)==len(scores))           # They must be the same length
    N = len(gen)                               # N is equal to that length
    Ns = int(len(gen)*(1.-percent_eliminate))  # Get the sample size
    P = (np.array(scores, dtype='float')+abs(min(scores))+.001) / np.sum(np.array(scores)+abs(min(scores))+.001) # normalize scores to get probabilities

    # Kill off percent_eliminate of the worst worlds
    survive_index = choice(np.arange(len(gen)), size = Ns, replace = False, p = P)
    gen, scores = [gen[i] for i in survive_index], [scores[i] for i in survive_index]
    P = (np.array(scores, dtype='float')+abs(min(scores))+.001) / np.sum(np.array(scores)+abs(min(scores))+.001) # normalize scores to get probabilities
    
    # Sample the generation based on score, higher the more probable to breed
    A = choice(np.arange(Ns), size = N, replace = True, p = P)
    B = choice(np.arange(Ns), size = N, replace = True, p = P)
    A, B = [gen[i] for i in A], [gen[i] for i in B]
    
    # Generate the next generation
    new_gen, new_scores = [], []
    for a, b in zip(A,B):
        # Generate new world via gene combination and mutation
        new_world = combine(a,b)
        new_world = mutate(new_world, rate = mutation_rate)
        
        # Score the new world
        new_score = score(new_world, safety_weight = Safety, freedom_weight = Freedom)
        
        # Append it to the new generation
        new_gen.append(new_world)
        new_scores.append(new_score)
        
    return new_gen, new_scores

示例#20

0

显示文件

文件： benchmark_cortex.py 项目： vivisect/synapse

    def __init__(self, test_data_fn):
        start = now()
        if os.path.isfile(test_data_fn):
            print("Reading test data...")
            self.prepop_rows, self.idens, self.props, self.rows = \
                pickle.load(open(test_data_fn, 'rb'))
        else:
            print("Generating test data...")
            random.seed(4)  # 4 chosen by fair dice roll.  Guaranteed to be random
            forms = [gen_random_form() for x in range(NUM_FORMS)]
            # FIXME:  don't use random.choice!!! Super duper slow
            self.prepop_rows = flatten(_rows_from_tufo(gen_random_tufo(random.choice(forms)))
                                       for x in range(NUM_PREEXISTING_TUFOS))
            tufos = [gen_random_tufo(random.choice(forms)) for x in range(NUM_TUFOS)]
            self.idens = [t[0] for t in tufos]
            self.props = [get_random_keyval(t[1]) for t in tufos]
            random.shuffle(self.idens)
            random.shuffle(self.props)

            self.rows = flatten(_rows_from_tufo(x) for x in tufos)
            pickle.dump((self.prepop_rows, self.idens, self.props, self.rows),
                        open(test_data_fn, 'wb'))

        print("Test data generation took: %.2f" % (now() - start))
        print('addRows: # Tufos:%8d, # Rows: %8d' % (NUM_TUFOS, len(self.rows)))
        print('len count: small:%d, medium:%d, large:%d, huge:%d' %
              (small_count, medium_count, large_count, huge_count))

示例#21

0

显示文件

文件： gamegen.py 项目： egtaonline/GameAnalysis

def sample_profiles(base, num): # pylint: disable=inconsistent-return-statements
    """Generate unique profiles from a game

    Parameters
    ----------
    base : RsGame
        Game to generate random profiles from.
    num : int
        Number of profiles to sample from the game.
    """
    if num == base.num_all_profiles: # pylint: disable=no-else-return
        return base.all_profiles()
    elif num == 0:
        return np.empty((0, base.num_strats), int)
    elif base.num_all_profiles <= np.iinfo(int).max:
        inds = rand.choice(base.num_all_profiles, num, replace=False)
        return base.profile_from_id(inds)
    else:
        # Number of times we have to re-query
        ratio = (sps.digamma(float(base.num_all_profiles)) -
                 sps.digamma(float(base.num_all_profiles - num)))
        # Max is for underflow
        num_per = max(round(float(ratio * base.num_all_profiles)), num)
        profiles = set()
        while len(profiles) < num:
            profiles.update(
                utils.hash_array(p) for p in base.random_profiles(num_per))
        profiles = np.stack([h.array for h in profiles])
        inds = rand.choice(profiles.shape[0], num, replace=False)
        return profiles[inds]

示例#22

0

显示文件

文件： Thresholding.py 项目： RafaelGCPP/Thresholding_python

def main():
    from matplotlib.pyplot import figure,plot, close
    from numpy.random import standard_normal,choice
    from numpy.linalg import qr
    from numpy import dot
    import CAMP_C
    #from myOmp import omp_naive as omp
    N=2000
    M=900
    K=100
    sigma_n=0.001
    A=standard_normal((N,N))+1j*standard_normal((N,N))
    (Q,R)=qr(A)
    i=choice(N,M,False)  
    A=Q[i,:]

    x=(standard_normal((N,1))+1j*standard_normal((N,1)))/sqrt(2)
    j=choice(N,N-K,False)
    x[j,:]=0
    
    y=dot(A,x)+sigma_n*standard_normal((M,1))
    xhat=CAMP_C.CAMP(A,y,1,True)
    print norm(x-xhat)/N
    close('all')
    plot(real(x))
    plot(real(xhat))
    figure()
    plot(imag(x))
    plot(imag(xhat))

示例#23

0

显示文件

文件： assemble.py 项目： Fresh-Z/mtcnn_pytorch

def assemble_data(output_file, anno_file_list=[]):
    # assemble the annotations to one file
    size = 12

    if len(anno_file_list) == 0:
        return 0

    if os.path.exists(output_file):
        os.remove(output_file)

    for anno_file in anno_file_list:
        with open(anno_file, 'r') as f:
            anno_lines = f.readlines()

        base_num = 250000

        if len(anno_lines) > base_num * 3:
            idx_keep = npr.choice(len(anno_lines), size=base_num * 3, replace=True)
        elif len(anno_lines) > 100000:
            idx_keep = npr.choice(len(anno_lines), size=len(anno_lines), replace=True)
        else:
            idx_keep = np.arange(len(anno_lines))
            np.random.shuffle(idx_keep)
        chose_count = 0
        with open(output_file, 'a+') as f:
            for idx in idx_keep:
                f.write(anno_lines[idx])
                chose_count += 1

    return chose_count

示例#24

0

显示文件

文件： agent.py 项目： rbsimmer/udacity-ML-nanodegree

    def choose_action(self, state):
        """ The choose_action function is called when the agent is asked to choose
            which action to take, based on the 'state' the smartcab is in. """

        # Set the agent state and default action
        self.state = state
        self.next_waypoint = self.planner.next_waypoint()
        action = random.choice(self.valid_actions)
        
        # When not learning, choose a random action
        # When learning, choose a random action with 'epsilon' probability
        #   Otherwise, choose an action with the highest Q-value for the current state        
        if self.learning == True:
            maxQ = self.get_maxQ(state)
            n_maxQ = sum(1 for v in self.Q[str(state)].values() if v == maxQ)

            if n_maxQ > 1:
                maxQ_actions = []
                for k,v in self.Q[str(state)].iteritems():      
                    if v == maxQ:
                        maxQ_actions.append(k)            
                action = choice(maxQ_actions)
            else:    
                if choice([True,False], 1, p=[1-self.epsilon, self.epsilon]):
                    for k,v in self.Q[str(state)].iteritems():
                        if v == maxQ:
                            action = k
 
        return action

示例#25

0

显示文件

文件： common_math.py 项目： jeongyoonlee/rosetta

def subsample_arr(arr, N=None, frac_keep=None):
    """
    Subsample a Series, DataFrame, or ndarray along axis 0.

    Parameters
    ----------
    arr : Series, DataFrame, or ndarray
    N : Integer
        Number of samples to keep
    frac_keep : Real in [0, 1]
        Fraction of samples to keep

    Returns
    -------
    subsampled : Series, DataFrame, or ndarray
        A copy
    """
    # Input checking
    assert ((N is None) and (frac_keep is not None)) \
        or ((N is not None) and (frac_keep is None))

    #
    if N is None:
        N = int(len(arr) * frac_keep)

    if isinstance(arr, np.ndarray):
        index = choice(range(len(arr)), size=N, replace=False)
        return arr[np.ix_(index)]
    elif isinstance(arr, pd.Series) or isinstance(arr, pd.DataFrame):
        index = choice(arr.index, size=N, replace=False)
        return arr.ix[index]
    else:
        raise ValueError("arr of unhandled type:  %s" % type(arr))

示例#26

0

显示文件

文件： iterators.py 项目： 4Catalyzer/nolearn_utils

    def transform(self, Xb, yb):
        Xb, yb = super(AffineTransformBatchIteratorMixin,
                       self).transform(Xb, yb)
        # Skip if affine_p is 0. Setting affine_p may be useful for quickly
        # disabling affine transformation
        if self.affine_p == 0:
            return Xb, yb

        idx = get_random_idx(Xb, self.affine_p)
        Xb_transformed = Xb.copy()

        for i in idx:
            scale = choice(self.affine_scale_choices)
            rotation = choice(self.affine_rotation_choices)
            shear = choice(self.affine_shear_choices)

            affine_translation_y_choices = self.affine_translation_choices if self.affine_translation_y_choices is None else self.affine_translation_y_choices
            affine_translation_x_choices = self.affine_translation_choices if self.affine_translation_x_choices is None else self.affine_translation_x_choices

            translation_y = choice(affine_translation_y_choices)
            translation_x = choice(affine_translation_x_choices)
            img_transformed, tform = im_affine_transform(
                Xb[i], return_tform=True,
                scale=scale, rotation=rotation,
                shear=shear,
                translation_y=translation_y,
                translation_x=translation_x,
                center_rel=self.center_rel,
            )
            Xb_transformed[i] = img_transformed

        return Xb_transformed, yb

示例#27

0

显示文件

文件： genMNIST.py 项目： houyimeng/workspace

 def genArgumentMNIST(self):
     
     print "generating Argument MNIST image in progress..."
     dataset = MNISTDataset("MNIST")
     gendataset = []
     genlabelset = []
     stepsizeX = array([0, 2, 6, 8])
     stepsizeY = array([0, 2, 6, 8])
     
     rd_idx0 = random.choice(60000, 10000, replace=False)
     
     for i in rd_idx0:
         labelTr, itemTr = dataset.getTrainingItem(i)
         temp_tr = reshape( itemTr, (28,28))
         itemTr_pad = zeropadding(temp_tr, 4, 4)
         
         for x in stepsizeX:
             for y in stepsizeY:
                 TEMP = itemTr_pad[x:x+28, y:y+28].flatten()
                 gendataset.append( TEMP )
                 genlabelset.append( labelTr.flatten() )
                 
     print "Argument MNIST image complete!"
     rd_idx1 = random.choice(len(gendataset), self.numArgu, replace=False)
     
     popdatamatrix = zeros((self.numArgu, 28*28))
     poplabelmatrix = zeros((self.numArgu, 10))  
     
     for i in range(self.numArgu):
         popdatamatrix[i,:] = gendataset[rd_idx1[i]]
         poplabelmatrix[i,:] = genlabelset[rd_idx1[i]]
         
     return popdatamatrix, poplabelmatrix

示例#28

0

显示文件

文件： deepwalk.py 项目： mehtakash93/Research_Work

    def __generate_random_walks(self,graph, length_of_path, number_per_node):
        """
            For each node in the graph object, this method will compute
            a number of random walks of a certain length.

            Parameters:
                graph: The networkx graph object
                length_of_path: The length of the random walk generated 
                    for each node
                number_per_node: The number of randoms walks generated 
                    for each node
            Returns:
                The list of generated random walks
        """
        random_walks = []
        for i in graph.nodes():
            for j in range(0,number_per_node):
                path = [i]

                for k in range(0,length_of_path):
                    sample_set = filter(lambda x: x not in path, graph.all_neighbors(i,i[-1]))
                    if len(sample_set) == 0:
                        break
                    sample = npr.choice(sample_set, size=1)
                    while(sample in path):
                        sample = npr.choice(sample_set, size=1)

                random_walks.append(path)

        return random_walks

示例#29

0

显示文件

文件： agent.py 项目： YaronBlinder/Machine-Learning-NanoDegree

    def update(self, t):
        # Gather inputs
        self.next_waypoint = self.planner.next_waypoint()  # from route planner, also displayed by simulator
        inputs = self.env.sense(self)
        deadline = self.env.get_deadline(self)

        if self.next_waypoint == None:
            self.state = 'Destination'
        else:
            self.state = (self.next_waypoint, inputs['light'],inputs['oncoming'],inputs['right'],inputs['left'])

        # TODO: Select action according to your policy

        valid_actions = ['forward','right','left', None]
        epsilon = 0.1 #0.01

        best_action = max(self.Q_hat[self.state],key=self.Q_hat[self.state].get)
        random_action = choice(valid_actions)
        action = choice([best_action, random_action],p=[1-epsilon,epsilon])

        # Execute action and get reward

        reward = self.env.act(self, action)

        # Learn policy based on state, action, reward

        new_next_waypoint = self.planner.next_waypoint()  # from route planner, also displayed by simulator
        new_inputs = self.env.sense(self)
        new_state = (new_next_waypoint, new_inputs['light'],new_inputs['oncoming'],new_inputs['right'],new_inputs['left'])
        alpha = 0.5 #opt 0.7
        gamma = 0.5 #opt 0.1
        max_Qhat_ahat = max(self.Q_hat[new_state].values())
        self.Q_hat[self.state][action] = (1-alpha)*self.Q_hat[self.state][action]+alpha*(reward+gamma*max_Qhat_ahat)

        print "LearningAgent.update(): deadline = {}, inputs = {}, action = {}, reward = {}".format(deadline, inputs, action, reward)  # [debug]

示例#30

0

显示文件

文件： TreeBuilder.py 项目： aewhatley/QCon-DAG

    def AddLeafToTree(self, id, diameter):
        """
        Add a single leaf to the tree. The position is guided
        by the diameter parameter which indicates the precentage
        of the maximum possible tree diameter to use.
        """

        # Handle the cases where the tree is empty (or a single node)
        if len(self.__tree.vertices) == 0:
            Leaf(label=id, tree=self.__tree)
        elif len(self.__tree.vertices) == 1:
            Edge(nodes=[self.__tree.vertices[0],
                        Leaf(label=id, tree=self.__tree)],
                 tree=self.__tree)
        else:
            # Find the edges which will (and will not) increase the diameter
            (will, willnot)  = self.__PartitionEdges()

            # if no unmarked edges exist or we want to increase diameter
            # randomly select an edge to split

            if not len(willnot) or random.random() > (1.0 - diameter):
                esplit = random.choice(will)
            else:
                esplit = random.choice(willnot)

            # Add new taxa splitting the edge
            self.__SplitEdge(esplit, id)

示例#31

0

显示文件

文件： anchor_target_layer_tf.py 项目： cwxcode/OCR_caption_recognition

def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride = [16,], anchor_scales = [16,]):
    """
    Assign anchors to ground-truth targets. Produces anchor classification
    labels and bounding-box regression targets.
    Parameters
    ----------
    rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer
    gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class]
    gt_ishard: (G, 1), 1 or 0 indicates difficult or not
    dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0
    im_info: a list of [image_height, image_width, scale_ratios]
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    Returns
    ----------
    rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare
    rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform)
                            that are the regression objectives
    rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg
    rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg,
                            beacuse the numbers of bgs and fgs mays significiantly different
    """
    _anchors = generate_anchors(scales=np.array(anchor_scales))#生成基本的anchor,一共9个
    _num_anchors = _anchors.shape[0]#9个anchor

    if DEBUG:
        print('anchors:')
        print(_anchors)
        print('anchor shapes:')
        print((np.hstack((
            _anchors[:, 2::4] - _anchors[:, 0::4],
            _anchors[:, 3::4] - _anchors[:, 1::4],
        ))))
        _counts = cfg.EPS
        _sums = np.zeros((1, 4))
        _squared_sums = np.zeros((1, 4))
        _fg_sum = 0
        _bg_sum = 0
        _count = 0

    # allow boxes to sit over the edge by a small amount
    _allowed_border =  0
    # map of shape (..., H, W)
    #height, width = rpn_cls_score.shape[1:3]

    im_info = im_info[0]#图像的高宽及通道数

    #在feature-map上定位anchor，并加上delta，得到在实际图像中anchor的真实坐标
    # Algorithm:
    # for each (H, W) location i
    #   generate 9 anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the 9 anchors
    # filter out-of-image anchors
    # measure GT overlap

    assert rpn_cls_score.shape[0] == 1, \
        'Only single item batches are supported'

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]#feature-map的高宽

    if DEBUG:
        print(('AnchorTargetLayer: height', height, 'width', width))
        print('')
        print(('im_size: ({}, {})'.format(im_info[0], im_info[1])))
        print(('scale: {}'.format(im_info[2])))
        print(('height, width: ({}, {})'.format(height, width)))
        print(('rpn: gt_boxes.shape', gt_boxes.shape))
        print(('rpn: gt_boxes', gt_boxes))

    # 1. Generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y) # in W H order
    # K is H x W
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                        shift_x.ravel(), shift_y.ravel())).transpose()#生成feature-map和真实image上anchor之间的偏移量
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors#9个anchor
    K = shifts.shape[0]#50*37，feature-map的宽乘高的大小
    all_anchors = (_anchors.reshape((1, A, 4)) +
                   shifts.reshape((1, K, 4)).transpose((1, 0, 2)))#相当于复制宽高的维度，然后相加
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    #仅保留那些还在图像内部的anchor，超出图像的都删掉
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border) &
        (all_anchors[:, 1] >= -_allowed_border) &
        (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)    # height
    )[0]

    if DEBUG:
        print(('total_anchors', total_anchors))
        print(('inds_inside', len(inds_inside)))

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]#保留那些在图像内的anchor
    if DEBUG:
        print(('anchors.shape', anchors.shape))

    #至此，anchor准备好了
    #--------------------------------------------------------------
    # label: 1 is positive, 0 is negative, -1 is dont care
    # (A)
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)#初始化label，均为-1

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt), shape is A x G
    #计算anchor和gt-box的overlap，用来给anchor上标签
    overlaps = bbox_overlaps(
        np.ascontiguousarray(anchors, dtype=np.float),
        np.ascontiguousarray(gt_boxes, dtype=np.float))#假设anchors有x个，gt_boxes有y个，返回的是一个（x,y）的数组
    # 存放每一个anchor和每一个gtbox之间的overlap
    argmax_overlaps = overlaps.argmax(axis=1) # (A)#找到和每一个gtbox，overlap最大的那个anchor
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0) # G#找到每个位置上9个anchor中与gtbox，overlap最大的那个
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0#先给背景上标签，小于0.3overlap的

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1#每个位置上的9个anchor中overlap最大的认为是前景
    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1#overlap大于0.7的认为是前景

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # preclude dontcare areas
    if dontcare_areas is not None and dontcare_areas.shape[0] > 0:#这里我们暂时不考虑有doncare_area的存在
        # intersec shape is D x A
        intersecs = bbox_intersections(
            np.ascontiguousarray(dontcare_areas, dtype=np.float), # D x 4
            np.ascontiguousarray(anchors, dtype=np.float) # A x 4
        )
        intersecs_ = intersecs.sum(axis=0) # A x 1
        labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1

    #这里我们暂时不考虑难样本的问题
    # preclude hard samples that are highly occlusioned, truncated or difficult to see
    if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[0] > 0:
        assert gt_ishard.shape[0] == gt_boxes.shape[0]
        gt_ishard = gt_ishard.astype(int)
        gt_hardboxes = gt_boxes[gt_ishard == 1, :]
        if gt_hardboxes.shape[0] > 0:
            # H x A
            hard_overlaps = bbox_overlaps(
                np.ascontiguousarray(gt_hardboxes, dtype=np.float), # H x 4
                np.ascontiguousarray(anchors, dtype=np.float)) # A x 4
            hard_max_overlaps = hard_overlaps.max(axis=0)  # (A)
            labels[hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1
            max_intersec_label_inds = hard_overlaps.argmax(axis=1) # H x 1
            labels[max_intersec_label_inds] = -1 #

    # subsample positive labels if we have too many
    #对正样本进行采样，如果正样本的数量太多的话
    # 限制正样本的数量不超过128个
    #TODO 这个后期可能还需要修改，毕竟如果使用的是字符的片段，那个正样本的数量是很多的。
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(
            fg_inds, size=(len(fg_inds) - num_fg), replace=False)#随机去除掉一些正样本
        labels[disable_inds] = -1#变为-1

    # subsample negative labels if we have too many
    #对负样本进行采样，如果负样本的数量太多的话
    # 正负样本总数是256，限制正样本数目最多128，
    # 如果正样本数量小于128，差的那些就用负样本补上，凑齐256个样本
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(
            bg_inds, size=(len(bg_inds) - num_bg), replace=False)
        labels[disable_inds] = -1
        #print "was %s inds, disabling %s, now %s inds" % (
            #len(bg_inds), len(disable_inds), np.sum(labels == 0))

    # 至此， 上好标签，开始计算rpn-box的真值
    #--------------------------------------------------------------
    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])#根据anchor和gtbox计算得真值（anchor和gtbox之间的偏差）


    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)#内部权重，前景就给1，其他是0

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:#暂时使用uniform 权重，也就是正样本是1，负样本是0
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0) + 1
        # positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        # negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        positive_weights = np.ones((1, 4))
        negative_weights = np.zeros((1, 4))
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            (np.sum(labels == 1)) + 1)
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            (np.sum(labels == 0)) + 1)
    bbox_outside_weights[labels == 1, :] = positive_weights#外部权重，前景是1，背景是0
    bbox_outside_weights[labels == 0, :] = negative_weights

    if DEBUG:
        _sums += bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
        _counts += np.sum(labels == 1)
        means = _sums / _counts
        stds = np.sqrt(_squared_sums / _counts - means ** 2)
        print('means:')
        print(means)
        print('stdevs:')
        print(stds)

    # map up to original set of anchors
    # 一开始是将超出图像范围的anchor直接丢掉的，现在在加回来
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)#这些anchor的label是-1，也即dontcare
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)#这些anchor的真值是0，也即没有值
    bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)#内部权重以0填充
    bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)#外部权重以0填充

    if DEBUG:
        print(('rpn: max max_overlap', np.max(max_overlaps)))
        print(('rpn: num_positive', np.sum(labels == 1)))
        print(('rpn: num_negative', np.sum(labels == 0)))
        _fg_sum += np.sum(labels == 1)
        _bg_sum += np.sum(labels == 0)
        _count += 1
        print(('rpn: num_positive avg', _fg_sum / _count))
        print(('rpn: num_negative avg', _bg_sum / _count))

    # labels
    labels = labels.reshape((1, height, width, A))#reshap一下label
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4))#reshape

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4))
    rpn_bbox_outside_weights = bbox_outside_weights

    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights

示例#32

0

显示文件

文件： three_doors.py 项目： shinefuture230/deep_learn-computer_visual

random.seed(42)

n_tests = 10000

winning_doors = random.randint(0, 3, n_tests)
change_mind_wins = 0
insist_wins = 0

for winning_door in winning_doors:

    first_try = random.randint(0, 3)
    remaining_choices = [i for i in range(3) if i != first_try]
    wrong_choices = [i for i in range(3) if i != winning_door]

    if first_try in wrong_choices:
        wrong_choices.remove(first_try)

    screened_out = random.choice(wrong_choices)
    remaining_choices.remove(screened_out)

    changed_mind_try = remaining_choices[0]

    change_mind_wins += 1 if changed_mind_try == winning_door else 0
    insist_wins += 1 if first_try == winning_door else 0

print(
    'You win {1} out of {0} tests if you changed your mind\n'
    'You win {2} out of {0} tests if you insist on the initial choice'.format(
        n_tests, change_mind_wins, insist_wins))

示例#33

0

显示文件

文件： metropolis.py 项目： rsumner31/pymc3-23

def sample_except(limit, excluded):
    candidate = nr.choice(limit - 1)
    if candidate >= excluded:
        candidate += 1
    return candidate

示例#34

0

显示文件

文件： rcnn.py 项目： jacke121/Deep-Feature-Flow

def sample_rois(rois,
                fg_rois_per_image,
                rois_per_image,
                num_classes,
                cfg,
                labels=None,
                overlaps=None,
                bbox_targets=None,
                gt_boxes=None):
    """
    generate random sample of ROIs comprising foreground and background examples
    :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index
    :param fg_rois_per_image: foreground roi number
    :param rois_per_image: total roi number
    :param num_classes: number of classes
    :param labels: maybe precomputed
    :param overlaps: maybe precomputed (max_overlaps)
    :param bbox_targets: maybe precomputed
    :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls)
    :return: (labels, rois, bbox_targets, bbox_weights)
    """
    if labels is None:
        overlaps = bbox_overlaps(rois[:, 1:].astype(np.float),
                                 gt_boxes[:, :4].astype(np.float))
        gt_assignment = overlaps.argmax(axis=1)
        overlaps = overlaps.max(axis=1)
        labels = gt_boxes[gt_assignment, 4]

    # foreground RoI with FG_THRESH overlap
    fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs
    fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size)
    # Sample foreground regions without replacement
    if len(fg_indexes) > fg_rois_per_this_image:
        fg_indexes = npr.choice(fg_indexes,
                                size=fg_rois_per_this_image,
                                replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI)
                          & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
                                        bg_indexes.size)
    # Sample foreground regions without replacement
    if len(bg_indexes) > bg_rois_per_this_image:
        bg_indexes = npr.choice(bg_indexes,
                                size=bg_rois_per_this_image,
                                replace=False)

    # indexes selected
    keep_indexes = np.append(fg_indexes, bg_indexes)

    # pad more to ensure a fixed minibatch size
    while keep_indexes.shape[0] < rois_per_image:
        gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0])
        gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False)
        keep_indexes = np.append(keep_indexes, gap_indexes)

    # select labels
    labels = labels[keep_indexes]
    # set labels of bg_rois to be 0
    labels[fg_rois_per_this_image:] = 0
    rois = rois[keep_indexes]

    # load or compute bbox_target
    if bbox_targets is not None:
        bbox_target_data = bbox_targets[keep_indexes, :]
    else:
        targets = bbox_transform(rois[:, 1:],
                                 gt_boxes[gt_assignment[keep_indexes], :4])
        if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
            targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) /
                       np.array(cfg.TRAIN.BBOX_STDS))
        bbox_target_data = np.hstack((labels[:, np.newaxis], targets))

    bbox_targets, bbox_weights = \
        expand_bbox_regression_targets(bbox_target_data, num_classes, cfg)

    return rois, labels, bbox_targets, bbox_weights

示例#35

0

显示文件

文件： data_gen.py 项目： cwhy/littleStepsToRNN

def split(_fts, _lbs, test_ratio):
    indices = range(_lbs.size)
    i_test = rnd.choice(indices, size=round(test_ratio * _lbs.size))
    i_train = np.array([i for i in indices if i not in i_test])
    return (_fts[i_train, :], _lbs[i_train]), (_fts[i_test, :], _lbs[i_test])

示例#36

0

显示文件

文件： plot_gene_ld_condition_freq.py 项目： zhiru-liu/microbiome_evolution

sys.stderr.write("Calculating intra-gene LD...\n")
# calculate SFS
for gene_name in allele_counts_map.keys():
    print gene_name
    locations = numpy.array([location for chromosome, location in allele_counts_map[gene_name]['4D']['locations']])*1.0
    allele_counts = allele_counts_map[gene_name]['4D']['alleles']
        
    if len(allele_counts)==0:
        # no diversity to look at!
        continue
        
    # pick a random gene somewhere else as a control
    control_gene_name = gene_name
    control_allele_counts = []
    while gene_name==control_gene_name or len(control_allele_counts)==0:
        control_gene_name = choice(allele_counts_map.keys())
        control_allele_counts = allele_counts_map[control_gene_name]['4D']['alleles']
        
        
    allele_counts = allele_counts[:,desired_samples,:]
    control_allele_counts = control_allele_counts[:,desired_samples,:]
    
    #compute the distances between all pairs of sites 
    # None in the two index positions results in a transpose of the vector relative to each other
    # Subtraction between the two vectors results in pairwise subtraction of each element in each vector.
    distances = numpy.fabs(locations[:,None]-locations[None,:])
  
    low_freq=0.3
    high_freq=0.5
    rsquared_numerators, rsquared_denominators = diversity_utils.calculate_rsquared_condition_freq(allele_counts, allele_counts, low_freq, high_freq)
    control_rsquared_numerators, control_rsquared_denominators = diversity_utils.calculate_rsquared_condition_freq(allele_counts, control_allele_counts, low_freq, high_freq)

示例#37

0

显示文件

def ADAM(A_input,
         b_input,
         x_input,
         probDist,
         maxIters,
         errorType=0,
         utType="Reg",
         tpType="Reg",
         momentumMult=0.1):
    numStates, numFeatures = A_input.shape

    A_proc, b_proc = np.array(A_input, copy=True), np.array(b_input, copy=True)
    x_first = np.array(x_input, copy=True).reshape(numFeatures)
    x_last = x_first
    x_proc = np.array(x_input, copy=True).reshape(numFeatures)
    x_prev = x_proc
    # Error vectors
    errors = np.zeros(maxIters)
    errors[0] = errorCalcs.getErrorMethod(A_proc,
                                          b_proc,
                                          x_proc,
                                          probDist,
                                          errorType,
                                          norm=2)
    iters = 1
    beta2 = 0.999
    beta1 = 0.9
    meanSquareGradientAccumulator = np.zeros(x_proc.shape)
    momentumAccumulator = np.zeros(x_proc.shape)
    while iters < maxIters:
        numSamples = 3

        sampledRows = random.choice(numStates, numSamples, p=probDist)
        tp1 = TPCore.TPAlgosampledRows(A_proc,
                                       b_proc,
                                       x_proc,
                                       sampledRows=sampledRows)
        tp2 = TPCore.TPAlgosampledRows(A_proc,
                                       b_proc,
                                       tp1,
                                       sampledRows=sampledRows)
        dTP1 = tp1 - x_proc
        dTP2 = tp2 - tp1
        ddTP = dTP2 - dTP1

        kappa = utils.twoNorm(ddTP) / (utils.twoNorm(dTP1))**2
        # Radius of osculating circle
        radius = 1 / kappa
        radiusByNorm_dTP1 = radius / utils.twoNorm(dTP1)

        alpha = 1 / (iters * numSamples / numStates + 1)
        alpha = alpha * radiusByNorm_dTP1
        # Notice that we have multiplied and divided by utils.twoNorm(dTP1) one,
        # which was done for clarity and may be skipped.

        # alpha = alpha * radius
        momentumAccumulator = beta1 * momentumAccumulator + (1 - beta1) * dTP1
        meanSquareGradientAccumulator = beta2 * meanSquareGradientAccumulator + (
            1 - beta2) * dTP1**2
        mHat = momentumAccumulator / (1 - beta1**(iters + 1))
        vHat = meanSquareGradientAccumulator / (1 - beta2**(iters + 1))
        epsilon = 1e-6
        # momentumTerm = alpha * mHat / ((vHat) ** 0.5 + epsilon) - alpha * dTP1
        momentumTerm = alpha * mHat / ((vHat)**0.5 + epsilon)

        x_prev = x_proc
        x_proc = x_proc + momentumTerm

        errors[iters] = errorCalcs.getErrorMethod(A_proc,
                                                  b_proc,
                                                  x_proc,
                                                  probDist,
                                                  errorType,
                                                  norm=2)
        iters += 1

    return x_proc, errors

示例#38

0

显示文件

    def forward(self, arguments, outputs, device=None, outputs_to_retain=None):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate 9 anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the 9 anchors
        # filter out-of-image anchors
        # measure GT overlap

        bottom = arguments

        # map of shape (..., H, W)
        height, width = bottom[0].shape[-2:]
        # GT boxes (x1, y1, x2, y2, label)
        gt_boxes = bottom[1][0, :]
        # im_info
        im_info = bottom[2]

        # remove zero padded ground truth boxes
        keep = np.where(((gt_boxes[:, 2] - gt_boxes[:, 0]) > 0)
                        & ((gt_boxes[:, 3] - gt_boxes[:, 1]) > 0))
        gt_boxes = gt_boxes[keep]

        if DEBUG:
            print('')
            print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
            print('scale: {}'.format(im_info[2]))
            print('height, width: ({}, {})'.format(height, width))
            print('rpn: gt_boxes.shape', gt_boxes.shape)
            #print ('rpn: gt_boxes', gt_boxes)

        # 1. Generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        all_anchors = (self._anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2)))
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        inds_inside = np.where(
            (all_anchors[:, 0] >= -self._allowed_border)
            & (all_anchors[:, 1] >= -self._allowed_border)
            & (all_anchors[:, 2] < im_info[1] + self._allowed_border)
            &  # width
            (all_anchors[:, 3] < im_info[0] + self._allowed_border)  # height
        )[0]

        if DEBUG:
            print('total_anchors', total_anchors)
            print('inds_inside', len(inds_inside))

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]
        if DEBUG:
            print('anchors.shape', anchors.shape)

        # label: 1 is positive, 0 is negative, -1 is dont care
        labels = np.empty((len(inds_inside), ), dtype=np.float32)
        labels.fill(-1)

        # overlaps between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes, dtype=np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg["TRAIN"].RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps < cfg["TRAIN"].RPN_NEGATIVE_OVERLAP] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IOU
        labels[max_overlaps >= cfg["TRAIN"].RPN_POSITIVE_OVERLAP] = 1

        if cfg["TRAIN"].RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg["TRAIN"].RPN_NEGATIVE_OVERLAP] = 0

        # subsample positive labels if we have too many
        num_fg = int(cfg["TRAIN"].RPN_FG_FRACTION * cfg["TRAIN"].RPN_BATCHSIZE)
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > num_fg:
            if self._determininistic_mode:
                disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
            else:
                disable_inds = npr.choice(fg_inds,
                                          size=(len(fg_inds) - num_fg),
                                          replace=False)
            labels[disable_inds] = -1

        # subsample negative labels if we have too many
        num_bg = cfg["TRAIN"].RPN_BATCHSIZE - np.sum(labels == 1)
        bg_inds = np.where(labels == 0)[0]
        if len(bg_inds) > num_bg:
            if self._determininistic_mode:
                disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
            else:
                disable_inds = npr.choice(bg_inds,
                                          size=(len(bg_inds) - num_bg),
                                          replace=False)
            labels[disable_inds] = -1

        bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
        bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

        bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
        bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))

        if DEBUG:
            self._sums += bbox_targets[labels == 1, :].sum(axis=0)
            self._squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0)
            self._counts += np.sum(labels == 1)
            means = self._sums / self._counts
            stds = np.sqrt(self._squared_sums / self._counts - means**2)
            print('means:')
            print(means)
            print('stdevs:')
            print(stds)

        # map up to original set of anchors
        labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
        bbox_inside_weights = _unmap(bbox_inside_weights,
                                     total_anchors,
                                     inds_inside,
                                     fill=0)

        if DEBUG:
            print('rpn: max max_overlap', np.max(max_overlaps))
            print('rpn: num_positive', np.sum(labels == 1))
            print('rpn: num_negative', np.sum(labels == 0))
            self._fg_sum += np.sum(labels == 1)
            self._bg_sum += np.sum(labels == 0)
            self._count += 1
            print('rpn: num_positive avg', self._fg_sum / self._count)
            print('rpn: num_negative avg', self._bg_sum / self._count)

        # labels
        labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
        outputs[self.outputs[0]] = np.ascontiguousarray(labels)

        # bbox_targets
        bbox_targets = bbox_targets.reshape(
            (1, height, width, A * 4)).transpose(0, 3, 1, 2)
        outputs[self.outputs[1]] = np.ascontiguousarray(bbox_targets)

        # bbox_inside_weights
        bbox_inside_weights = bbox_inside_weights \
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
        assert bbox_inside_weights.shape[2] == height
        assert bbox_inside_weights.shape[3] == width
        outputs[self.outputs[2]] = np.ascontiguousarray(bbox_inside_weights)

        # No state needs to be passed to backward() so we just pass None
        return None

示例#39

0

显示文件

文件： db.py 项目： zhangan15/Place-Pulse

 def randomLocPair(self, study_id):
     activeLocID = self.study2activeLocID[study_id]
     a, b = rnd.choice(activeLocID.keys(), size=2, replace=False)
     return self.locs[a], self.locs[b]

示例#40

0

显示文件

文件： TrafficLight.py 项目： croatis/225V-SATLO

    def getNextRule(self, validRulesRS, validRulesRSint, time):
        #for x in self.communicatedIntentions:
        #print("TL is", self.getName(),". The intention is", self.communicatedIntentions[x].getAction())
        self.numOfRulesSelected += 1
        # First, select a rule from RS and communicate it
        intendedRule = self.getAssignedIndividual().selectRule(
            validRulesRS)  # Get intended rule to apply
        #print("Intended rule is", intendedRule, "!\n\n\n")
        if intendedRule == -1:
            self.numOfTimesNoRSRuleWasValid += 1
            if self.currentRule is None or self.currentRule == -1:
                self.setIntention(
                    Intention(self,
                              len(self.getAgentPool().getActionSet()) - 1,
                              time))  # Return the Do Nothing action
            else:
                #print("Using current rule instead. It is", self.currentRule)
                self.setIntention(
                    Intention(self, self.currentRule.getAction(), time))
        else:
            if self.currentRule is None or self.currentRule == -1:
                #print('In else. Intended rule is', intendedRule)
                self.setIntention(
                    Intention(self,
                              len(self.getAgentPool().getActionSet()) - 1,
                              time))
            else:
                self.setIntention(
                    Intention(self, intendedRule.getAction(), time))

            # If intended rule isn't user-defined, select a rule from RSint and then decide between the two
        coopRule = self.getAssignedIndividual().selectCoopRule(validRulesRSint)
        if coopRule == -1:
            self.numOfTimesNoCoopRuleWasValid += 1
            #print("No valid rule from RSint.")

        if intendedRule == -1 and coopRule == -1:
            #print("Neither intended nor coopRule valid.")
            if self.currentRule is None or self.currentRule == -1:
                #print('In if statement. Current rule is', self.currentRule)
                self.setIntention(
                    Intention(self,
                              len(self.getAgentPool().getActionSet()) - 1,
                              time))
                return -1
            else:
                #print("Returning currentRule with action", self.currentRule.getAction())
                self.setIntention(
                    Intention(self, self.currentRule.getAction(), time))
                return self.currentRule
            # If no valid rules apply from RSint, return the intented rule from RS
        elif coopRule == -1 and intendedRule != -1:
            #print("CoopRule invalid. Applying intended rule:", intendedRule)
            self.setIntention(Intention(self, intendedRule.getAction(), time))
            return intendedRule

        elif coopRule != -1 and intendedRule == -1:
            #print("Intended rule invalid. Applying coop rule:", coopRule)
            self.setIntention(Intention(self, coopRule.getAction(), time))
            return coopRule

        elif coopRule.getWeight() >= intendedRule.getWeight():
            #print("CoopRule has higher weight than intended rule. Applying it:", coopRule)
            self.setIntention(Intention(self, coopRule.getAction(), time))
            return coopRule
        else:
            rule = choice([coopRule, intendedRule], 1, p=[
                pCoop, (1 - pCoop)
            ])  # Select one of the two rules based on pCoop value
            #print("The rule options are", rule, "and we chose", rule[0])
            self.setIntention(Intention(self, rule[0].getAction(), time))
            return rule[
                0]  # Choice returns an array, so we take the only element of it

示例#41

0

显示文件

def my_rand(i, w):
    normed = [elem / sum(w) for elem in w]
    return choice(i, p=normed)

示例#42

0

显示文件

文件： db.py 项目： zhangan15/Place-Pulse

 def getRandomStudy(self):
     return self.getStudy(self.biased_studs[rnd.choice(
         len(self.biased_studs))]['_id'])

示例#43

0

显示文件

文件： augmentations.py 项目： syc10-09/yolact_cityscapes_550

    def __call__(self, image, masks, boxes=None, labels=None):
        height, width, _ = image.shape
        while True:
            # randomly choose a mode
            mode = random.choice(self.sample_options)
            if mode is None:
                return image, masks, boxes, labels

            min_iou, max_iou = mode
            if min_iou is None:
                min_iou = float('-inf')
            if max_iou is None:
                max_iou = float('inf')

            # max trails (50)
            for _ in range(50):
                current_image = image

                w = random.uniform(0.3 * width, width)
                h = random.uniform(0.3 * height, height)

                # aspect ratio constraint b/t .5 & 2
                if h / w < 0.5 or h / w > 2:
                    continue

                left = random.uniform(width - w)
                top = random.uniform(height - h)

                # convert to integer rect x1,y1,x2,y2
                rect = np.array(
                    [int(left),
                     int(top),
                     int(left + w),
                     int(top + h)])

                # calculate IoU (jaccard overlap) b/t the cropped and gt boxes
                overlap = jaccard_numpy(boxes, rect)

                # This piece of code is bugged and does nothing:
                # https://github.com/amdegroot/ssd.pytorch/issues/68
                #
                # However, when I fixed it with overlap.max() < min_iou,
                # it cut the mAP in half (after 8k iterations). So it stays.
                #
                # is min and max overlap constraint satisfied? if not try again
                if overlap.min() < min_iou and max_iou < overlap.max():
                    continue

                # cut the crop from the image
                current_image = current_image[rect[1]:rect[3],
                                              rect[0]:rect[2], :]

                # keep overlap with gt box IF center in sampled patch
                centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0

                # mask in all gt boxes that above and to the left of centers
                m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])

                # mask in all gt boxes that under and to the right of centers
                m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])

                # mask in that both m1 and m2 are true
                mask = m1 * m2

                # [0 ... 0 for num_gt and then 1 ... 1 for num_crowds]
                num_crowds = labels['num_crowds']
                crowd_mask = np.zeros(mask.shape, dtype=np.int32)

                if num_crowds > 0:
                    crowd_mask[-num_crowds:] = 1

                # have any valid boxes? try again if not
                # Also make sure you have at least one regular gt
                if not mask.any() or np.sum(1 - crowd_mask[mask]) == 0:
                    continue

                # take only the matching gt masks
                current_masks = masks[mask, :, :].copy()

                # take only matching gt boxes
                current_boxes = boxes[mask, :].copy()

                # take only matching gt labels
                labels['labels'] = labels['labels'][mask]
                current_labels = labels

                # We now might have fewer crowd annotations
                if num_crowds > 0:
                    labels['num_crowds'] = np.sum(crowd_mask[mask])

                # should we use the box left and top corner or the crop's
                current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
                                                  rect[:2])
                # adjust to crop (by substracting crop's left,top)
                current_boxes[:, :2] -= rect[:2]

                current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
                                                  rect[2:])
                # adjust to crop (by substracting crop's left,top)
                current_boxes[:, 2:] -= rect[:2]

                # crop the current masks to the same dimensions as the image
                current_masks = current_masks[:, rect[1]:rect[3],
                                              rect[0]:rect[2]]

                return current_image, current_masks, current_boxes, current_labels

示例#44

0

显示文件

def main(_):
    sns.set()
    sns.set_palette(sns.color_palette('hls', 10))
    npr.seed(FLAGS.seed)

    logging.info('Starting experiment.')

    # Create model folder for outputs
    try:
        gfile.MakeDirs(FLAGS.work_dir)
    except gfile.GOSError:
        pass
    stdout_log = gfile.Open('{}/stdout.log'.format(FLAGS.work_dir), 'w+')

    # use mean/std of svhn train
    train_images, _, _ = datasets.get_dataset_split(
        name=FLAGS.train_split.split('-')[0],
        split=FLAGS.train_split.split('-')[1],
        shuffle=False)
    train_mu, train_std = onp.mean(train_images), onp.std(train_images)
    del train_images

    # BEGIN: fetch test data and candidate pool
    test_images, test_labels, _ = datasets.get_dataset_split(
        name=FLAGS.test_split.split('-')[0],
        split=FLAGS.test_split.split('-')[1],
        shuffle=False)
    pool_images, pool_labels, _ = datasets.get_dataset_split(
        name=FLAGS.pool_split.split('-')[0],
        split=FLAGS.pool_split.split('-')[1],
        shuffle=False)

    n_pool = len(pool_images)
    test_images = (test_images -
                   train_mu) / train_std  # normalize w train mu/std
    pool_images = (pool_images -
                   train_mu) / train_std  # normalize w train mu/std

    # augmentation for train/pool data
    if FLAGS.augment_data:
        augmentation = data.chain_transforms(data.RandomHorizontalFlip(0.5),
                                             data.RandomCrop(4), data.ToDevice)
    else:
        augmentation = None
    # END: fetch test data and candidate pool

    # BEGIN: load ckpt
    opt_init, opt_update, get_params = optimizers.sgd(FLAGS.learning_rate)

    if FLAGS.pretrained_dir is not None:
        with gfile.Open(FLAGS.pretrained_dir, 'rb') as fpre:
            pretrained_opt_state = optimizers.pack_optimizer_state(
                pickle.load(fpre))
        fixed_params = get_params(pretrained_opt_state)[:7]

        ckpt_dir = '{}/{}'.format(FLAGS.root_dir, FLAGS.ckpt_idx)
        with gfile.Open(ckpt_dir, 'wr') as fckpt:
            opt_state = optimizers.pack_optimizer_state(pickle.load(fckpt))
        params = get_params(opt_state)
        # combine fixed pretrained params and dpsgd trained last layers
        params = fixed_params + params
        opt_state = opt_init(params)
    else:
        ckpt_dir = '{}/{}'.format(FLAGS.root_dir, FLAGS.ckpt_idx)
        with gfile.Open(ckpt_dir, 'wr') as fckpt:
            opt_state = optimizers.pack_optimizer_state(pickle.load(fckpt))
        params = get_params(opt_state)

    stdout_log.write('finetune from: {}\n'.format(ckpt_dir))
    logging.info('finetune from: %s', ckpt_dir)
    test_acc, test_pred = accuracy(params,
                                   shape_as_image(test_images, test_labels),
                                   return_predicted_class=True)
    logging.info('test accuracy: %.2f', test_acc)
    stdout_log.write('test accuracy: {}\n'.format(test_acc))
    stdout_log.flush()
    # END: load ckpt

    # BEGIN: setup for dp model
    @jit
    def update(_, i, opt_state, batch):
        params = get_params(opt_state)
        return opt_update(i, grad_loss(params, batch), opt_state)

    @jit
    def private_update(rng, i, opt_state, batch):
        params = get_params(opt_state)
        rng = random.fold_in(rng, i)  # get new key for new random numbers
        return opt_update(
            i,
            private_grad(params, batch, rng, FLAGS.l2_norm_clip,
                         FLAGS.noise_multiplier, FLAGS.batch_size), opt_state)

    # END: setup for dp model

    n_uncertain = FLAGS.n_extra + FLAGS.uncertain_extra

    ### BEGIN: prepare extra points picked from pool data
    # BEGIN: on pool data
    pool_embeddings = [apply_fn_0(params[:-1],
                                  pool_images[b_i:b_i + FLAGS.batch_size]) \
                       for b_i in range(0, n_pool, FLAGS.batch_size)]
    pool_embeddings = np.concatenate(pool_embeddings, axis=0)

    pool_logits = apply_fn_1(params[-1:], pool_embeddings)

    pool_true_labels = np.argmax(pool_labels, axis=1)
    pool_predicted_labels = np.argmax(pool_logits, axis=1)
    pool_correct_indices = \
        onp.where(pool_true_labels == pool_predicted_labels)[0]
    pool_incorrect_indices = \
        onp.where(pool_true_labels != pool_predicted_labels)[0]
    assert len(pool_correct_indices) + \
        len(pool_incorrect_indices) == len(pool_labels)

    pool_probs = stax.softmax(pool_logits)

    if FLAGS.uncertain == 0 or FLAGS.uncertain == 'entropy':
        pool_entropy = -onp.sum(pool_probs * onp.log(pool_probs), axis=1)
        stdout_log.write('all {} entropy: min {}, max {}\n'.format(
            len(pool_entropy), onp.min(pool_entropy), onp.max(pool_entropy)))

        pool_entropy_sorted_indices = onp.argsort(pool_entropy)
        # take the n_uncertain most uncertain points
        pool_uncertain_indices = \
            pool_entropy_sorted_indices[::-1][:n_uncertain]
        stdout_log.write('uncertain {} entropy: min {}, max {}\n'.format(
            len(pool_entropy[pool_uncertain_indices]),
            onp.min(pool_entropy[pool_uncertain_indices]),
            onp.max(pool_entropy[pool_uncertain_indices])))

    elif FLAGS.uncertain == 1 or FLAGS.uncertain == 'difference':
        # 1st_prob - 2nd_prob
        assert len(pool_probs.shape) == 2
        sorted_pool_probs = onp.sort(pool_probs, axis=1)
        pool_probs_diff = sorted_pool_probs[:, -1] - sorted_pool_probs[:, -2]
        assert min(pool_probs_diff) > 0.
        pool_uncertain_indices = onp.argsort(pool_probs_diff)[:n_uncertain]

    # END: on pool data

    # BEGIN: cluster uncertain pool points
    big_pca = sklearn.decomposition.PCA(n_components=pool_embeddings.shape[1])
    big_pca.random_state = FLAGS.seed
    # fit PCA onto embeddings of all the pool points
    big_pca.fit(pool_embeddings)

    # For uncertain points, project embeddings onto the first K components
    pool_uncertain_projected_embeddings, _ = utils.project_embeddings(
        pool_embeddings[pool_uncertain_indices], big_pca, FLAGS.k_components)

    n_cluster = int(FLAGS.n_extra / FLAGS.ppc)
    cluster_method = get_cluster_method('{}_nc-{}'.format(
        FLAGS.clustering, n_cluster))
    cluster_method.random_state = FLAGS.seed
    pool_uncertain_cluster_labels = cluster_method.fit_predict(
        pool_uncertain_projected_embeddings)
    pool_uncertain_cluster_label_indices = {
        x: []
        for x in set(pool_uncertain_cluster_labels)
    }  # local i within n_uncertain
    for i, c_label in enumerate(pool_uncertain_cluster_labels):
        pool_uncertain_cluster_label_indices[c_label].append(i)

    # find center of each cluster
    # aka, the most representative point of each 'tough' cluster
    pool_picked_indices = []
    pool_uncertain_cluster_label_pick = {}
    for c_label, indices in pool_uncertain_cluster_label_indices.items():
        cluster_projected_embeddings = \
            pool_uncertain_projected_embeddings[indices]
        cluster_center = onp.mean(cluster_projected_embeddings,
                                  axis=0,
                                  keepdims=True)
        if FLAGS.distance == 0 or FLAGS.distance == 'euclidean':
            cluster_distances = euclidean_distances(
                cluster_projected_embeddings, cluster_center).reshape(-1)
        elif FLAGS.distance == 1 or FLAGS.distance == 'weighted_euclidean':
            cluster_distances = weighted_euclidean_distances(
                cluster_projected_embeddings, cluster_center,
                big_pca.singular_values_[:FLAGS.k_components])

        sorted_is = onp.argsort(cluster_distances)
        sorted_indices = onp.array(indices)[sorted_is]
        pool_uncertain_cluster_label_indices[c_label] = sorted_indices
        center_i = sorted_indices[0]  # center_i in 3000
        pool_uncertain_cluster_label_pick[c_label] = center_i
        pool_picked_indices.extend(
            pool_uncertain_indices[sorted_indices[:FLAGS.ppc]])

        # BEGIN: visualize cluster of picked pool
        if FLAGS.visualize:
            this_cluster = []
            for i in sorted_indices:
                idx = pool_uncertain_indices[i]
                img = utils.denormalize(pool_images[idx], train_mu, train_std)
                if idx in pool_correct_indices:
                    border_color = 'green'
                else:
                    border_color = 'red'
                    img = utils.mark_labels(img, pool_predicted_labels[idx],
                                            pool_true_labels[idx])
                img = utils.add_border(img, width=2, color=border_color)
                this_cluster.append(img)
            utils.tile_image_list(
                this_cluster, '{}/picked_uncertain_pool_cid-{}'.format(
                    FLAGS.work_dir, c_label))
        # END: visualize cluster of picked pool

    # END: cluster uncertain pool points

    pool_picked_indices = list(set(pool_picked_indices))

    n_gap = FLAGS.n_extra - len(pool_picked_indices)
    gap_indices = list(set(pool_uncertain_indices) - set(pool_picked_indices))
    pool_picked_indices.extend(npr.choice(gap_indices, n_gap, replace=False))
    stdout_log.write('n_gap: {}\n'.format(n_gap))
    ### END: prepare extra points picked from pool data

    finetune_images = copy.deepcopy(pool_images[pool_picked_indices])
    finetune_labels = copy.deepcopy(pool_labels[pool_picked_indices])

    stdout_log.write('{} points picked via {}\n'.format(
        len(finetune_images), FLAGS.uncertain))
    logging.info('%d points picked via %s', len(finetune_images),
                 FLAGS.uncertain)
    assert FLAGS.n_extra == len(finetune_images)
    # END: gather points to be used for finetuning

    stdout_log.write('Starting fine-tuning...\n')
    logging.info('Starting fine-tuning...')
    stdout_log.flush()

    for epoch in range(1, FLAGS.epochs + 1):

        # BEGIN: finetune model with extra data, evaluate and save
        num_extra = len(finetune_images)
        num_complete_batches, leftover = divmod(num_extra, FLAGS.batch_size)
        num_batches = num_complete_batches + bool(leftover)

        finetune = data.DataChunk(X=finetune_images,
                                  Y=finetune_labels,
                                  image_size=32,
                                  image_channels=3,
                                  label_dim=1,
                                  label_format='numeric')

        batches = data.minibatcher(finetune,
                                   FLAGS.batch_size,
                                   transform=augmentation)

        itercount = itertools.count()
        key = random.PRNGKey(FLAGS.seed)

        start_time = time.time()

        for _ in range(num_batches):
            # tmp_time = time.time()
            b = next(batches)
            if FLAGS.dpsgd:
                opt_state = private_update(
                    key, next(itercount), opt_state,
                    shape_as_image(b.X, b.Y, dummy_dim=True))
            else:
                opt_state = update(key, next(itercount), opt_state,
                                   shape_as_image(b.X, b.Y))
            # stdout_log.write('single update in {:.2f} sec\n'.format(
            #     time.time() - tmp_time))

        epoch_time = time.time() - start_time
        stdout_log.write('Epoch {} in {:.2f} sec\n'.format(epoch, epoch_time))
        logging.info('Epoch %d in %.2f sec', epoch, epoch_time)

        # accuracy on test data
        params = get_params(opt_state)

        test_pred_0 = test_pred
        test_acc, test_pred = accuracy(params,
                                       shape_as_image(test_images,
                                                      test_labels),
                                       return_predicted_class=True)
        test_loss = loss(params, shape_as_image(test_images, test_labels))
        stdout_log.write(
            'Eval set loss, accuracy (%): ({:.2f}, {:.2f})\n'.format(
                test_loss, 100 * test_acc))
        logging.info('Eval set loss, accuracy: (%.2f, %.2f)', test_loss,
                     100 * test_acc)
        stdout_log.flush()

        # visualize prediction difference between 2 checkpoints.
        if FLAGS.visualize:
            utils.visualize_ckpt_difference(test_images,
                                            np.argmax(test_labels, axis=1),
                                            test_pred_0,
                                            test_pred,
                                            epoch - 1,
                                            epoch,
                                            FLAGS.work_dir,
                                            mu=train_mu,
                                            sigma=train_std)

    # END: finetune model with extra data, evaluate and save

    stdout_log.close()

示例#45

0

显示文件

文件： chatbot.py 项目： prachee-03/ChatterBox

         from_)
     altInpMsg, update_id, from_ = getMessage(update_id, from_)
     altInpMsg = altInpMsg.strip().lower().split()
     if "yes" in altInpMsg or "yup" in altInpMsg:
         ansMatch = match(inputMsg, "data/final_concat.csv")
         bot.sendMessage(ansMatch, from_)
         altInpMsg = ''
     elif "no" in altInpMsg or "nope" in altInpMsg:
         bot.sendMessage("Your choice.. I was just trying to help.",
                         from_)
         altInpMsg = ''
     else:
         continue
 else:
     bot.sendMessage(ans, from_)
     randAltAns = choice(list(altans.keys()), size=2, replace=False)
     if len(altans) != 0:
         QUES = "Do you wish to know more about "
         # alternate answers
         bot.sendMessage(QUES + ", ".join(randAltAns) + "?", from_)
         altInpMsg, update_id, from_ = getMessage(update_id, from_)
         altInpMsg = altInpMsg.strip().lower().split()
         if ("yes" in altInpMsg):
             string = ""
             if len(altans1) != 0:
                 for w in randAltAns:
                     if w in altInpMsg:
                         string += altans[w] + "\n"
                 if string == "":
                     for w in randAltAns:
                         string += altans[w] + "\n"

示例#46

0

显示文件

文件： meta.py 项目： 314pe/Axelrod

 def meta_strategy(self, results, opponent):
     """Using the numpy.random choice function to sample with weights"""
     return choice(results, p=self.distribution)

示例#47

0

显示文件

import pandas as pd
from scipy.spatial.distance import pdist,squareform
from scipy import exp
from scipy.linalg import eigh

Dataset_originall=pd.read_csv('../parkinson.csv')#ファイルの読み込み
Dataset=Dataset_originall.values#DataframeオブジェクトをNumpy配列に入れる

m=5#特徴空間の射影は次元でとってくるのか
sigma=10#カーネル関数の分散
beta=0.5#ガウス回帰の値

#42このデータのうち30個を学習に、12個をテストに使う
from numpy import random
Domain_number=30
training_person=random.choice(list(range(1,43)),Domain_number,replace=False)#重複なしに取り出す
training_person.sort()#ガウス過程で突っ込む際に順番に並んであった方がわかりやすいので
#まず各人のデータを区別して保存していきます
#
#
#

Training_Dataset=[]#トレーニングデータ用の人のデータをここに格納していきます
for i in training_person:
    X=Dataset_originall[Dataset_originall['subject#']==i]#subjectが1（1番の人）全体をdataframeそのものとして取り出す
    X1=[X.iloc[:,4].values,X.iloc[:,5].values,X.iloc[:,6:].values]#取り出したDataFrameからラベルｙと特徴量Xを取り出す
    Training_Dataset.append(X1)
#[[1番目の人のデータ]、[2番目の人のデータ]・・・・]とリストになっている
#[1番目の人のデータ]＝[array[yの値＊149個分]、array[y２の値*149]、array[Xの値[[149],[16]]と格納されている。

#testdataの方も格納していきます

示例#48

0

显示文件

文件： P_pandas.py 项目： vitalizzare/NumFOCUS_Telethon_by_James_Powell

from pandas import array
xs = array([1, 2, 3])
print(f'{xs * 2 = }')
xs = array([1, 2, 3, nan, nan])
print(f'{xs * 2 = }')

from pandas import Series
s = Series([1, 2, 3], index=[2, 1, 0])
print(s)
print(f'{s[0] = }')
print(f'{s[0:1] = }')
print(f'{s.loc[0] = }')
print(f'{s.iloc[0] = }')

from pandas import DataFrame
from numpy.random import choice, normal
from string import ascii_lowercase
df = DataFrame({
    'ticker': choice([*ascii_lowercase], size=((size:=10), 4)).view('<U4').ravel(),
    'price': normal(size=size)
})
df = df.set_index('ticker')
print(df)
print(df.index)
print(df.columns)
print(df._data)
print(df.stack())
print(df.unstack())
print(df.melt())

示例#49

0

显示文件

 def sample(self, X, Y, params):
     sample_size = int(self.minibatch_frac * len(Y))
     idxs = np_rnd.choice(np.arange(len(Y)), sample_size, replace=False)
     return idxs, X[idxs, :], Y[idxs], params[idxs, :]

示例#50

0

显示文件

文件： samplot_wrapper.py 项目： HirschLabUMN/MaizeSV

def makeComboPics(vcf_list,
                  sample_list,
                  outdir,
                  bam_dir,
                  samplot_directory,
                  bcftools_executable,
                  num_pics,
                  num_samps,
                  ref_id,
                  length_threshold=100000):
    for i in vcf_list:
        if os.path.exists(i[1]):
            if i[1].endswith("vcf"):
                suf = i[0]
                vcf = VCF(i[1])
                vcf_dir = i[1].split("/")[-1].replace(".vcf", "_combos")
                # pdb.set_trace()
                Outdir = f"{outdir}/{vcf_dir}"
                if not os.path.exists(Outdir): os.mkdir(Outdir)
                if sample_list == "-9": samps = vcf.samples
                else: samps = sample_list.split(",")
                for variant in vcf:
                    svtype = variant.INFO.get('SVTYPE')
                    if svtype == "CNV":
                        svtype = variant.INFO.get('GSCNCATEGORY')
                        svLen = variant.INFO.get('GSELENGTH')
                        if svtype == "None":
                            print(
                                "Change Type to String for GSCNCATEGORY in VCF header"
                            )
                        genos = variant.format('CN').tolist()
                        genos = [x[0] for x in genos]
                        if variant.format('FT') is not None:
                            filts = [
                                j for j, x in enumerate(variant.format('FT'))
                                if x != "PASS"
                            ]
                        else:
                            filts = []
                        if samps.index(ref_id) in filts: continue
                        else:
                            ref_allele = genos[samps.index(ref_id)]
                            genos = [
                                0 if x == ref_allele else 3 for x in genos
                            ]
                            genos = [
                                -9 if j in filts else x
                                for j, x in enumerate(genos)
                            ]
                    else:
                        svLen = variant.INFO.get('SVLEN')
                        genos = variant.gt_types
                    if svLen < length_threshold:
                        alts = [j for j, x in enumerate(genos) if x == 3]
                        refs = [j for j, x in enumerate(genos) if x == 0]
                        if len(alts) > num_samps and len(
                                refs
                        ) > num_samps:  #CHANGE NEEDED HERE TO ALLOW FOR 3 AND 3 OR X AND X ALT/REF SAMPS
                            for k in range(0, num_pics):
                                Samps = [
                                    samps[ii] for ii in random.choice(
                                        alts, num_samps, replace=False)
                                ] + [
                                    samps[ii] for ii in random.choice(
                                        refs, num_samps, replace=False)
                                ]
                                # alt = [samps[i] for i in random.choice(alts, num_samps, replace=False)]
                                # ref = [samps[i] for i in random.choice(refs, num_samps, replace=False)]
                                Bams = [f"{bam_dir}/{x}{suf}" for x in Samps]
                                png_file = f"{svtype}_{variant.CHROM}_{variant.start}_{variant.end}.png"
                                cmd = f"{samplot_directory}/samplot.py -n {','.join(Samps)} -b {','.join(Bams)} -o {Outdir}/{png_file} -s {variant.start} -e {variant.end} -c {variant.CHROM} -a -t {svtype}"
                                print(cmd)
            elif i[1].endswith("gz"):
                print("unzip vcf file: ", i[1])
        else:
            print(i[1], "does not exist")
    return ()

示例#51

0

显示文件

def IBPFM(X,
          iteration,
          burnin=0,
          design=None,
          stdData=False,
          initZA=None,
          initSB=None,
          initK=None,
          initF=None,
          proposeK=True,
          updateZA=True,
          updateSB=True,
          updateF=True,
          nonGaussianF=False,
          updateIBPa_fm=True,
          updateIBPa_reg=True,
          updatePrec_x=True,
          updatePrec_a=True,
          updatePrec_b=True,
          prec_x_iso=False,
          learn_scale_xb=True,
          prec_a_iso=False,
          learn_scale_ab=True,
          prec_b_iso=False,
          learn_scale_bb=True,
          prec_x=None,
          prec_xa=1,
          prec_xb=1,
          prec_xb_a=1,
          prec_xb_b=1,
          prec_a=None,
          prec_aa=1,
          prec_ab=1,
          prec_ab_a=1,
          prec_ab_b=1,
          prec_b=None,
          prec_ba=1,
          prec_bb=1,
          prec_bb_a=1,
          prec_bb_b=1,
          fmIBPa=None,
          fmIBPa_a=1,
          fmIBPa_b=1,
          regIBPa=None,
          regIBPa_a=1,
          regIBPa_b=1,
          DPa=None,
          DPa_a=1,
          DPa_b=1,
          saveIteration=False,
          printIteration=100):
    """Factor model with IBP prior using Gibbs sampler."""
    # Model: X = BH + (Z o A)F + noise
    # @X: (D x N) data matrix
    # @B: (D x P) regression coefficient for observed covariates
    # @H: (P x N) design matrix for observed covariates
    # @Z: (D x K) binary factor assignment matrix
    # @A: (D x K) factor loading matrix
    # @noise: (D x N) residuals

    # @iteration: # of simulation
    # @data: (D x N) data matrix
    # @design: (D x P) design matrix for covariates

    # OPTIONAL ARGUMENTS
    # @stdData: standardize data if necessary
    # @initZA: initial state of (Z o A) matrix. override initK if it's not None
    # @initK: initial number of features
    # @initF: initial state of F matrix
    # @proposeK: enable non-parametric approach for feature count K
    # @updateZA: update matrix Z and A
    # @updateF: update matrix F
    # @nonGaussianF: use Dirichlet process for F
    # @updatePrec_x: update inverse of residual variance
    # @updatePrec_a: update inverse of factor loading variance
    # #prec_x_iso: use isotropic residual variance?
    # @prec_a_iso: use isotropic factor loading variance?
    # @updateIBPa: update IBP parameter
    # @prec_x: initial state of prec_x (scalar)
    # @prec_xa: Gamma shape parameter for P(prec_x)
    # @prec_xb: Gamma rate parameter for P(prec_x)
    # @prec_xb_a: Gamma shape parameter for P(prec_xb)
    # @prec_xb_b: Gamma rate parameter for P(prec_xb)
    # @prec_a: initial state of prec_a (scalar)
    # @prec_aa: Gamma shape parameter for P(prec_a)
    # @prec_ab: Gamma rate parameter for P(prec_a)
    # @prec_ab_a: Gamma shape parameter for P(prec_ab)
    # @prec_ab_b: Gamma rate parameter for P(prec_ab)
    # @fmIBPa: IBP alpha Parameter for factor model part
    # @fmIBPa_a: Gamma shape parameter for P(fmIBPa)
    # @fmIBPa_b: Gamma rate parameter for P(fmIBPa)
    # @regIBPa: IBP alpha Parameter for regression part
    # @regIBPa_a: Gamma shape parameter for P(regIBPa)
    # @regIBPa_b: Gamma rate parameter for P(regIBPa)
    # @DPa: concentration parameter for Dirichlet process
    # @DPa_b: Gamma shape parameter for P(DPa)
    # @DPa_b: Gamma rate parameter for P(DPa)
    # @saveIteration: save output for each iteration as file

    D, N = X.shape

    # Create a matrix with missing indicators
    Xmask = np.isnan(X).astype(np.int)

    if stdData:
        X = (X - np.nanmean(X, axis=1, keepdims=True)) /\
            np.nanstd(X, axis=1, keepdims=True)

    # Initialize noise variance from Gamma prior
    if prec_x is None:
        prec_x = np.ones(D) * nr.gamma(prec_xa, 1. / prec_xb)
    else:
        prec_x = np.ones(D) * prec_x

    # Initialize IBP parameter alpha from Gamma prior
    if fmIBPa is None:
        fmIBPa = nr.gamma(fmIBPa_a, fmIBPa_b)
    if design is not None and regIBPa is None:
        regIBPa = nr.gamma(regIBPa_a, regIBPa_b)

    if initZA is None:
        # Generate binary feature assignment matrix Z
        if initK is not None:
            K = initK
            Z = nr.binomial(1, 0.5, (D, K))
        else:
            Z = simulateIBP(regIBPa, D)
            Z = Z.astype(np.int)
            K = Z.shape[1]

        # Initialize feature loading variance from Gamma prior
        if prec_a is None:
            prec_a = np.ones(K) * nr.gamma(prec_aa, 1. / prec_ab)
        else:
            prec_a = np.ones(K) * prec_a

        # Simulate feature loading matrix A based on N(A_dk | 0, sigma_a)
        A = np.copy(Z)
        A = A.astype(np.float)
        for (d, k) in zip(*A.nonzero()):
            A[d, k] = nr.normal(0, np.sqrt(1 / prec_a[k]))

    else:
        A = initZA.astype(np.float)
        Z = (A != 0).astype(np.int)

        K = Z.shape[1]

        # Initialize feature loading variance from Gamma prior
        if prec_a is None:
            prec_a = np.ones(K) * nr.gamma(prec_aa, 1. / prec_ab)
        else:
            prec_a = np.ones(K) * prec_a

    # Check A is float type and Z is integer type
    assert (np.issubsctype(A, np.float) and np.issubsctype(Z, np.int))

    # Initialize feature score matrix F
    if initF is None:
        if nonGaussianF:
            from utils.sampleF_DP import sampleF
            if DPa is None:
                # Draw DP concentration parameter
                DPa = nr.gamma(DPa_a, 1. / DPa_b)
            while True:
                # Redraw DP concentration parameter until E(# of cluster) > 0
                J = int(DPa * np.log(N))
                if (J > 0):
                    break
                else:
                    DPa = nr.gamma(DPa_a, 1. / DPa_b)
            # Initialize cluster indicators for samples
            clus_ind = nr.choice(range(J), size=N)
            # Initialize unique factor vectors
            clus_theta = nr.normal(0, 1, (K, J))
            F = np.empty((K, N)) * np.nan
            for i in range(N):
                F[:, i] = clus_theta[:, clus_ind[i]]
        else:
            from utils.sampleF import sampleF
            F = nr.normal(0, 1, size=(K, N))
    else:
        F = initF.astype(np.float)
        assert (initF.shape == (K, N))
        if nonGaussianF:
            from utils.sampleF_DP import sampleF
            if (DPa is None):
                # Draw DP concentration parameter
                DPa = nr.gamma(DPa_a, 1. / DPa_b)

            # Extract unique score vectors and class indicators
            clus_theta = np.vstack({tuple(row) for row in F.T}).T
            assert (clus_theta.shape[0] == K)
            J = clus_theta.shape[1]
            clus_ind = np.array([], dtype=np.int)
            for i in range(N):
                label = np.where((F[:, [i]] == clus_theta).all(axis=0))
                clus_ind = np.append(clus_ind, label)
            assert (len(clus_ind) == N)
        else:
            from utils.sampleF import sampleF

    # Impute missing values if they exist
    if np.sum(Xmask) > 0:
        Xtemp = np.dot(A, F)
        X[Xmask == 1] = Xtemp[Xmask == 1]
        nonMissing = X[Xmask == 0]

    # Regression components for design and control variables
    if design is not None:
        H = design
        P = H.shape[0]
        assert (H.shape[1] == N)

        if initSB is None:
            # Generate binary feature assignment matrix Z
            intercept = np.ones(D).reshape(D, 1)
            S1 = nr.binomial(1, 0.5, (D, P - 1))
            S = np.hstack((intercept, S1)).astype(np.int)

            # Initialize feature loading variance from Gamma prior
            if prec_b is None:
                prec_b = np.ones(P) * nr.gamma(prec_ba, 1. / prec_bb)
            else:
                prec_b = np.ones(P) * prec_b

            # Simulate coefficient matrix B based on N(B_dk | 0, sigma_b)
            B = np.copy(S)
            B = S.astype(np.float)
            mean = np.nanmean(X, axis=1)
            for (d, p) in zip(*B.nonzero()):
                if p == 0:
                    # Start intercept with variable mean
                    B[d, p] = mean[d]
                else:
                    B[d, p] = nr.normal(0, np.sqrt(1 / prec_b[p]))
        else:
            B = initZA.astype(np.float)
            S = (B != 0).astype(np.int)
            assert (B.shape == (D, P))

            # Initialize feature loading variance from Gamma prior
            if prec_b is None:
                prec_b = np.ones(P) * nr.gamma(prec_ba, 1. / prec_bb)
            else:
                prec_b = np.ones(P) * prec_b

        # Check B is float type and S is integer type
        assert (np.issubsctype(B, np.float) and np.issubsctype(S, np.int))

    for s in range(iteration):
        # Save initial parameters
        if (s == 0):
            K_save = K
            fmIBPa_save = fmIBPa
            if design is not None:
                regIBPa_save = regIBPa
            psi_x_save = 1. / prec_x
            if nonGaussianF:
                DPa_save = DPa

            if design is not None:
                loglikelihood = logLik(X, F, A, prec_x, N, D, prec_x_iso, H, B)
            else:
                loglikelihood = logLik(X,
                                       F,
                                       A,
                                       prec_x,
                                       N,
                                       D,
                                       prec_x_iso,
                                       H=None,
                                       B=None)

            logLik_save = loglikelihood

            if proposeK is False:
                tau_a_save = 1. / prec_a
                Z_sum = np.zeros((D, K))
                A_sum = np.zeros((D, K))
                F_sum = np.zeros((K, N))
                if design is not None:
                    S_sum = np.zeros((D, P))
                    B_sum = np.zeros((D, P))

            time = datetime.datetime.now()
            print("=========================================================")
            print("Started at " + str(time))
            print("Data shape: observations = %d\t variables = %d" % (N, D))
            print("K = %d\tIBP_alpha = %.3f" % (K, fmIBPa))
            print("=========================================================")

        # Update coefficient matrix
        if design is not None:
            X_reg = X - np.dot(A, F)
            if updateSB:
                (S, B) = sampleSB(X_reg, H, S, B, prec_x, prec_b, regIBPa, P,
                                  N, D, prec_ba, prec_bb, prec_b_iso)
            X_fm = X - np.dot(B, H)

            # Update IBP parameter for regression part
            if updateIBPa_reg:
                regIBPa = sampleIBPa(regIBPa_a, regIBPa_b, P, D)

            # Update coefficient variance
            if updatePrec_b:
                if (learn_scale_bb and not prec_b_iso):
                    from utils.samplePrec_b import samplePrec_bb
                    prec_bb = samplePrec_bb(P, prec_b, prec_ba, prec_bb,
                                            prec_bb_a, prec_bb_b)

                prec_b = samplePrec_b(X_reg, S, B, P, prec_ba, prec_bb,
                                      prec_b_iso)
        else:
            X_fm = X

        # Update factor assignment matrix Z and factor loading matrix A
        if updateZA:
            (F, Z, A, K, prec_a) = sampleZA(X_fm, F, Z, A, prec_x, prec_a,
                                            fmIBPa, K, N, D, proposeK, prec_aa,
                                            prec_ab, prec_a_iso)

        # Update factor score matrix
        if updateF and nonGaussianF:
            (F, clus_ind, clus_theta, J,
             DPa) = sampleF(X_fm, F, A, prec_x, clus_ind, clus_theta, J, N, D,
                            K, DPa, DPa_a, DPa_b)

        if updateF and not nonGaussianF:
            F = sampleF(X_fm, A, prec_x, N, D, K)

        # Update factor loading variance
        if updatePrec_a:
            if (learn_scale_ab and not prec_a_iso):
                from utils.samplePrec_a import samplePrec_ab
                prec_ab = samplePrec_ab(K, prec_a, prec_aa, prec_ab, prec_ab_a,
                                        prec_ab_b)

            prec_a = samplePrec_a(X_fm, Z, A, K, prec_aa, prec_ab, prec_a_iso)

        # Update IBP parameter for factor model part
        if updateIBPa_fm:
            fmIBPa = sampleIBPa(fmIBPa_a, fmIBPa_b, K, D)

        # Update residual variance
        if updatePrec_x:
            if (learn_scale_xb and not prec_x_iso):
                from utils.samplePrec_x import samplePrec_xb
                prec_xb = samplePrec_xb(D, prec_x, prec_xa, prec_xb, prec_xb_a,
                                        prec_xb_b)

            if design is not None:
                residue = X - np.dot(B, H) - np.dot(A, F)
            else:
                residue = X - np.dot(A, F)
            prec_x = samplePrec_x(residue, N, D, prec_xa, prec_xb, prec_x_iso)

        # Update missing values based on posterior distribution
        if np.sum(Xmask > 0):
            # Predictive mean
            if design is not None:
                Xpred = np.dot(B, H) + np.dot(A, F)
            else:
                Xpred = np.dot(A, F)

            # Add noise
            covNoise = np.diag(1. / prec_x)
            noise = nr.multivariate_normal(np.zeros(D), covNoise, N).T
            Xpred += noise

            # Update missing values
            X[Xmask == 1] = Xpred[Xmask == 1]
            assert (all(nonMissing == X[Xmask == 0]))

        if design is not None:
            loglikelihood = logLik(X, F, A, prec_x, N, D, prec_x_iso, H, B)
        else:
            loglikelihood = logLik(X,
                                   F,
                                   A,
                                   prec_x,
                                   N,
                                   D,
                                   prec_x_iso,
                                   H=None,
                                   B=None)

        if (s + 1) % printIteration == 0:
            print("Iteration %d: K = %d\tIBP_alpha = %.3f\tlogLik= %.3f" %
                  ((s + 1), K, fmIBPa, loglikelihood))

        # Save parameters for each iteration
        K_save = np.append(K_save, K)
        fmIBPa_save = np.append(fmIBPa_save, fmIBPa)
        if design is not None:
            regIBPa_save = np.append(regIBPa_save, regIBPa)
        psi_x_save = np.vstack((psi_x_save, 1. / prec_x))
        if nonGaussianF:
            DPa_save = np.append(DPa_save, DPa)
        logLik_save = np.append(logLik_save, loglikelihood)

        if proposeK is False:
            tau_a_save = np.vstack((tau_a_save, 1. / prec_a))
            # Accumulate Z, A, F to calculate posterior mean
            if (s >= burnin):
                Z_sum = Z_sum + Z
                A_sum = A_sum + A
                F_sum = F_sum + F
                if design is not None:
                    S_sum = S_sum + S
                    B_sum = B_sum + B

        if saveIteration and s >= burnin:
            saveIter(s, Z, A, F, prec_x, prec_a)

    fmIBPa_mean = np.mean(fmIBPa_save[(burnin + 1):])
    psi_mean = psi_x_save[(burnin + 1):, :].mean(axis=0)
    np.savetxt("mIBPalpha_Fm.txt", np.array([fmIBPa_mean]), delimiter="\t")
    np.savetxt("mPsi.txt",
               psi_mean.reshape(1, psi_mean.shape[0]),
               delimiter="\t")

    np.savetxt("logLik.txt", logLik_save, delimiter="\t")

    if proposeK is False:
        NMCsample = iteration - burnin
        Z_mean = Z_sum.astype(np.float) / NMCsample
        A_mean = A_sum / NMCsample
        F_mean = F_sum / NMCsample
        tau_mean = tau_a_save[(burnin + 1):, :].mean(axis=0)
        np.savetxt("mZ.txt", Z_mean, delimiter="\t")
        np.savetxt("mA.txt", A_mean, delimiter="\t")
        np.savetxt("mF.txt", F_mean, delimiter="\t")
        np.savetxt("mTau.txt",
                   tau_mean.reshape(1, tau_mean.shape[0]),
                   delimiter="\t")
        if nonGaussianF:
            DPa_mean = np.mean(DPa_save[(burnin + 1):])
            np.savetxt("mDPalpha.txt", np.array([DPa_mean]), delimiter="\t")

    else:
        np.savetxt("K.txt", K_save, delimiter="\t")

    if design is not None:
        regIBPa_mean = np.mean(regIBPa_save[(burnin + 1):])
        np.savetxt("mRegIBPalpha_Reg.txt",
                   np.array([regIBPa_mean]),
                   delimiter="\t")

        NMCsample = iteration - burnin
        S_mean = S_sum / NMCsample
        B_mean = B_sum / NMCsample
        np.savetxt("mS.txt", S_mean, delimiter="\t")
        np.savetxt("mB.txt", B_mean, delimiter="\t")

    return

示例#52

0

显示文件

    def __call__(self, image, boxes=None, labels=None):
        height, width, _ = image.shape
        while True:
            # randomly choose a mode
            mode = random.choice(self.sample_options)
            if mode is None:
                return image, boxes, labels

            min_iou, max_iou = mode
            if min_iou is None:
                min_iou = float('-inf')
            if max_iou is None:
                max_iou = float('inf')

            # max trails (50)
            for _ in range(50):
                current_image = image

                w = random.uniform(0.3 * width, width)
                h = random.uniform(0.3 * height, height)

                # aspect ratio constraint b/t .5 & 2
                if h / w != 1:
                    continue
                left = random.uniform(width - w)
                top = random.uniform(height - h)

                # convert to integer rect x1,y1,x2,y2
                rect = np.array(
                    [int(left),
                     int(top),
                     int(left + w),
                     int(top + h)])

                # calculate IoU (jaccard overlap) b/t the cropped and gt boxes
                overlap = object_converage_numpy(boxes, rect)

                # is min and max overlap constraint satisfied? if not try again
                if overlap.max() < min_iou or overlap.min() > max_iou:
                    continue

                # cut the crop from the image
                current_image = current_image[rect[1]:rect[3],
                                              rect[0]:rect[2], :]

                # keep overlap with gt box IF center in sampled patch
                centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0

                # mask in all gt boxes that above and to the left of centers
                m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])

                # mask in all gt boxes that under and to the right of centers
                m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])

                # mask in that both m1 and m2 are true
                mask = m1 * m2

                # have any valid boxes? try again if not
                if not mask.any():
                    continue

                # take only matching gt boxes
                current_boxes = boxes[mask, :].copy()

                # take only matching gt labels
                current_labels = labels[mask]

                # should we use the box left and top corner or the crop's
                current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
                                                  rect[:2])
                # adjust to crop (by substracting crop's left,top)
                current_boxes[:, :2] -= rect[:2]

                current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
                                                  rect[2:])
                # adjust to crop (by substracting crop's left,top)
                current_boxes[:, 2:] -= rect[:2]

                return current_image, current_boxes, current_labels

示例#53

0

显示文件

 def _get_any_hidden_neuron(self) -> HiddenNeuron:
     return choice(self.hidden_neurons)

示例#54

0

显示文件

    def train(self):
        s_size = int(self.pop_size / self.pp)
        sf_pop = [self.create_solution() for _ in range(0, self.pop_size)]
        s_pop = [self.create_solution() for _ in range(0, s_size)]
        sf_gbest = self.get_global_best_solution(sf_pop, self.ID_FIT,
                                                 self.ID_MIN_PROB)
        s_gbest = self.get_global_best_solution(s_pop, self.ID_FIT,
                                                self.ID_MIN_PROB)

        for epoch in range(0, self.epoch):

            ## Calculate lamda_i using Eq.(7)
            ## Update the position of sailfish using Eq.(6)
            for i in range(0, self.pop_size):
                PD = 1 - len(sf_pop) / (len(sf_pop) + len(s_pop))
                lamda_i = 2 * uniform() * PD - PD
                sf_pop[i][self.ID_POS] = s_gbest[self.ID_POS] - lamda_i * (
                    uniform() *
                    (sf_gbest[self.ID_POS] + s_gbest[self.ID_POS]) / 2 -
                    sf_pop[i][self.ID_POS])

            ## Calculate AttackPower using Eq.(10)
            AP = self.A * (1 - 2 * (epoch + 1) * self.epxilon)
            if AP < 0.5:
                alpha = int(len(s_pop) * abs(AP))
                beta = int(self.problem_size * abs(AP))
                ### Random choice number of sardines which will be updated their position
                list1 = choice(range(0, len(s_pop)), alpha)
                for i in range(0, len(s_pop)):
                    if i in list1:
                        #### Random choice number of dimensions in sardines updated, remove third loop by numpy vector computation
                        list2 = choice(range(0, self.problem_size),
                                       beta,
                                       replace=False)
                        s_pop[i][self.ID_POS][list2] = (
                            uniform(0, 1, self.problem_size) *
                            (sf_gbest[self.ID_POS] - s_pop[i][self.ID_POS] +
                             AP))[list2]
            else:
                ### Update the position of all sardine using Eq.(9)
                for i in range(0, len(s_pop)):
                    s_pop[i][self.ID_POS] = uniform() * (
                        sf_gbest[self.ID_POS] - s_pop[i][self.ID_POS] + AP)

            ## Recalculate the fitness of all sardine
            for i in range(0, len(s_pop)):
                s_pop[i][self.ID_FIT] = self.get_fitness_position(
                    s_pop[i][self.ID_POS], self.ID_MIN_PROB)

            ## Sort the population of sailfish and sardine (for reducing computational cost)
            sf_pop = sorted(sf_pop, key=lambda temp: temp[self.ID_FIT])
            s_pop = sorted(s_pop, key=lambda temp: temp[self.ID_FIT])
            for i in range(0, self.pop_size):
                for j in range(0, len(s_pop)):
                    ### If there is a better position in sardine population.
                    if sf_pop[i][self.ID_FIT] > s_pop[j][self.ID_FIT]:
                        sf_pop[i] = deepcopy(s_pop[j])
                        del s_pop[j]
                    break  #### This simple keyword helped reducing ton of comparing operation.
                    #### Especially when sardine pop size >> sailfish pop size

            s_pop = s_pop + [
                self.create_solution() for _ in range(0, s_size - len(s_pop))
            ]

            sf_gbest = self.update_global_best_solution(
                sf_pop, self.ID_MIN_PROB, sf_gbest)
            s_gbest = self.update_global_best_solution(s_pop, self.ID_MIN_PROB,
                                                       s_gbest)

            self.loss_train.append(sf_gbest[self.ID_FIT])
            if self.verbose:
                print("> Epoch: {}, Best fit: {}".format(
                    epoch + 1, sf_gbest[self.ID_FIT]))
        self.solution = sf_gbest
        return sf_gbest[self.ID_POS], sf_gbest[self.ID_FIT], self.loss_train

示例#55

0

显示文件

 def _get_any_axon(self) -> Axon:
     any_neuron = self._get_any_non_input_neuron()
     if any_neuron.incoming_axons:
         return choice(any_neuron.incoming_axons)
     return None

示例#56

0

显示文件

 def _get_any_non_output_neuron(self) -> Neuron:
     return choice(self.input_neurons + self.hidden_neurons)

示例#57

0

显示文件

文件： uk_model_backwards_tracing_no_limits.py 项目： martyn1fyles/household-structured-contact-tracing

def run_simulation(repeat):

    infection_reporting_prob = npr.choice([0.1, 0.2, 0.3, 0.4, 0.5])

    haz_rate_scale = pairs_dict[infection_reporting_prob]

    contact_tracing_success_prob = npr.uniform(0.7, 0.95)

    contact_trace_delay_par = npr.uniform(1.5, 2.5)

    reduce_contacts_by = npr.uniform(0.0, 0.9)

    # Scenario A
    # reduce_contacts_by=(0.68, 0.83, 0.83, 0.821, 0.846, 0.836)
    # Scenario B
    # reduce_contacts_by=(0.638, 0.786, 0.76, 0.733, 0.765, 0.755)
    # Scenario C
    # reduce_contacts_by=(0.628, 0.76, 0.685, 0.632, 0.668, 0.668)
    #Scenario D
    # reduce_contacts_by=(0.561, 0.698, 0.61, 0.543, 0.589, 0.577)
    # Scenario E
    # reduce_contacts_by = (0.413, 0.544, 0.393, 0.278, 0.348, 0.315)

    #do_2_step = npr.choice([True, False])

    prob_has_trace_app = npr.uniform(0, 0.5)

    backwards_trace = True

    probable_infections_need_test = False

    backwards_tracing_time_limit = npr.choice(list(range(7, 22)))

    simulation = hct.uk_model(haz_rate_scale=haz_rate_scale,
                            household_haz_rate_scale=0.77729,
                            contact_tracing_success_prob=contact_tracing_success_prob,
                            contact_trace_delay_par=contact_trace_delay_par,
                            overdispersion=0.36,
                            infection_reporting_prob=infection_reporting_prob,
                            contact_trace=True,
                            reduce_contacts_by=reduce_contacts_by,
                            test_before_propagate_tracing=False,
                            probable_infections_need_test=probable_infections_need_test,
                            backwards_trace=backwards_trace,
                            backwards_tracing_time_limit=float('inf'),
                            number_of_days_to_trace_forwards=float('inf'),
                            number_of_days_to_trace_backwards=float('inf'),
                            prob_has_trace_app=prob_has_trace_app,
                            starting_infections=starting_infections)

    simulation.run_simulation(days_to_simulate)

    parameters = [
        haz_rate_scale,
        infection_reporting_prob,
        contact_tracing_success_prob,
        contact_trace_delay_par,
        reduce_contacts_by,
        prob_has_trace_app,
        backwards_trace,
        probable_infections_need_test
    ]
    return(parameters + simulation.inf_counts)

示例#58

0

显示文件

 def _get_any_neuron(self) -> Neuron:
     return choice(self.get_all_neurons())

示例#59

0

显示文件

 def add_places(self, places, p):
     draw = choice(np.arange(len(places)), 200, p=p)
     self.places = [places[i] for i in draw]

示例#60

0

显示文件

文件： minibatch.py 项目： Kenneth-Wong/tf-faster-rcnn

def _sample_graph(roidb, num_fg_rois, num_rois, rels_per_image):
    """
    Sample a graph from the foreground rois of an image
    :param:
    roidb: roidb of an image
    rois_per_image: maximum number of rois per image

    :return:
    roi_inds: 1d-array, the indexes of rois that are considered in the sampled graph.
                fg:bg ~ 1:3, fg may less than num_fg_rois(32)
    rels: (N, 3)-array for (sub, obj, rel), N is not certain. negative rel is no more than num_neg_rels
    """

    gt_rels = roidb['gt_relations']
    # index of assigned gt box for foreground boxes
    fg_gt_ind_assignments = roidb['fg_gt_ind_assignments']

    # find all fg proposals that are mapped to a gt
    gt_to_fg_roi_inds = {}
    all_fg_roi_inds = []
    for ind, gt_ind in fg_gt_ind_assignments.items():
        if gt_ind not in gt_to_fg_roi_inds:
            gt_to_fg_roi_inds[gt_ind] = []
        gt_to_fg_roi_inds[gt_ind].append(ind)
        all_fg_roi_inds.append(ind)

    # print('gt rois = %i' % np.where(roidb['max_overlaps']==1)[0].shape[0])
    # print('assigned gt = %i' % len(gt_to_fg_roi_inds.keys()))
    # dedup the roi inds
    all_fg_roi_inds = np.array(list(set(all_fg_roi_inds)))

    # find all valid relations in fg objects
    pos_rels = []
    for rel in gt_rels:
        for sub_i in gt_to_fg_roi_inds[rel[0]]:
            for obj_i in gt_to_fg_roi_inds[rel[1]]:
                pos_rels.append([sub_i, obj_i, rel[2]])

    # print('num fg rois = %i' % all_fg_roi_inds.shape[0])

    rels = []
    rels_inds = []
    roi_inds = []

    if len(pos_rels) > 0:
        # de-duplicate the relations
        _, indices = np.unique(["{} {}".format(i, j) for i,j,k in pos_rels], return_index=True)
        pos_rels = np.array(pos_rels)[indices, :]
        pos_inds = pos_rels[indices, :2].tolist()
        #print('num pos rels = %i' % pos_rels.shape[0])

        # construct graph based on valid relations
        for rel in pos_rels:
            roi_inds += rel[:2].tolist()
            roi_inds = list(set(roi_inds)) # keep roi inds unique
            rels.append(rel)
            rels_inds.append(rel[:2].tolist())

            if len(roi_inds) >= num_fg_rois or len(rels_inds) >= rels_per_image: # here it usually limit the num of pos rel
                break

    #print('sampled rels = %i' % len(rels))

    roi_candidates = np.setdiff1d(all_fg_roi_inds, roi_inds)
    num_rois_to_sample = min(num_fg_rois - len(roi_inds), len(roi_candidates))
    # if not enough rois, sample fg rois
    if num_rois_to_sample > 0:
        roi_sample = npr.choice(roi_candidates.astype(np.int32), size=num_rois_to_sample,
                                replace=False)
        roi_inds = np.hstack([roi_inds, roi_sample])
        #print('sampled fg rois = %i' % num_rois_to_sample)

    # sample background relations
    sample_rels = []
    sample_rels_inds = []
    for i in roi_inds:
        for j in roi_inds:
            if i != j and [i, j] not in rels_inds and [i, j] not in pos_inds:
                sample_rels.append([i,j,0])
                sample_rels_inds.append([i,j])
    #print('background rels= %i' % len(sample_rels))

    if len(sample_rels) > 0:
        # randomly sample negative edges to prevent no edges
        num_neg_rels = np.minimum(len(sample_rels), rels_per_image-len(rels_inds))
        #fprint('sampled background rels= %i' % num_neg_rels)
        inds = npr.choice(np.arange(len(sample_rels)), size=num_neg_rels, replace=False)
        rels += [sample_rels[i] for i in inds]
        rels_inds += [sample_rels_inds[i] for i in inds]

    # if still not enough rois, sample bg rois
    num_rois_to_sample = num_rois - len(roi_inds)
    if num_rois_to_sample > 0:
        bg_roi_inds = _sample_bg_rois(roidb, num_rois_to_sample)
        roi_inds = np.hstack([roi_inds, bg_roi_inds])

    roi_inds = np.array(roi_inds).astype(np.int64)
    # print('sampled rois = %i' % roi_inds.shape[0])
    return roi_inds.astype(np.int64), np.array(rels).astype(np.int64)