Exemplo n.º 1
0
def test_hierarchical_dev_expansion():
    """Test that hierarchical dev expansion is correct"""
    game = rsgame.empty([9, 16], [4, 3])
    mask = [True, False, True, False, False, True, False]
    profs = hr.expand_deviation_profiles(game, mask, [3, 4])
    actual = utils.axis_to_elem(profs)
    expected = utils.axis_to_elem([
        [6, 3, 0, 0, 0, 16, 0],
        [3, 3, 3, 0, 0, 16, 0],
        [0, 3, 6, 0, 0, 16, 0],
        [6, 0, 0, 3, 0, 16, 0],
        [3, 0, 3, 3, 0, 16, 0],
        [0, 0, 6, 3, 0, 16, 0],
        [9, 0, 0, 0, 4, 12, 0],
        [6, 0, 3, 0, 4, 12, 0],
        [3, 0, 6, 0, 4, 12, 0],
        [0, 0, 9, 0, 4, 12, 0],
        [9, 0, 0, 0, 0, 12, 4],
        [6, 0, 3, 0, 0, 12, 4],
        [3, 0, 6, 0, 0, 12, 4],
        [0, 0, 9, 0, 0, 12, 4],
    ])
    assert np.setxor1d(actual, expected).size == 0

    profs = hr.expand_deviation_profiles(game, mask, [3, 4], 0)
    actual = utils.axis_to_elem(profs)
    expected = utils.axis_to_elem([
        [6, 3, 0, 0, 0, 16, 0],
        [3, 3, 3, 0, 0, 16, 0],
        [0, 3, 6, 0, 0, 16, 0],
        [6, 0, 0, 3, 0, 16, 0],
        [3, 0, 3, 3, 0, 16, 0],
        [0, 0, 6, 3, 0, 16, 0],
    ])
    assert np.setxor1d(actual, expected).size == 0
Exemplo n.º 2
0
def test_rand_dpr_allow_incomplete(add_prob, num_obs, game_desc):
    """Test that allow_incomplete works for random games"""
    # Generate games
    players, strategies, red_players = game_desc
    base = rsgame.BaseGame(players, strategies)
    game = gamegen.add_profiles(base, add_prob)
    sgame = gamegen.add_noise(game, 1, num_obs)
    red = reduction.DeviationPreserving(strategies, players, red_players)

    # Try to reduce game
    red_game = red.reduce_game(game, True)
    red_sgame = red.reduce_game(sgame, True)

    # Verify that when allow_incomplete, then reduce returns all profiles
    reduced_full_profiles = utils.axis_to_elem(
        red.reduce_profiles(game.profiles))
    reduced_profiles = utils.axis_to_elem(red_game.profiles)
    assert np.setxor1d(reduced_profiles, reduced_full_profiles).size == 0
    reduced_sample_profiles = utils.axis_to_elem(red_sgame.profiles)
    assert np.setxor1d(reduced_sample_profiles,
                       reduced_full_profiles).size == 0

    redord = np.argsort(reduced_profiles)
    redsord = np.argsort(reduced_sample_profiles)
    assert np.all(np.isnan(red_game.payoffs[redord]) ==
                  np.isnan(red_sgame.payoffs[redsord])), \
        "sample game and game didn't have same nan payoffs"
    assert all(np.all(np.isnan(p).any(-1) == np.isnan(p).all(-1)) for p
               in red_sgame.sample_payoffs), \
        "some sample payoffs had partial nans"
Exemplo n.º 3
0
def test_twins_dev_expansion():
    """Test that dpr dev expansion is correct

    Note, this is the only one that has "new" code, so it's the most important
    to test."""
    game = rsgame.empty([9, 16], [4, 3])
    mask = [True, False, True, False, False, True, False]
    profs = tr.expand_deviation_profiles(game, mask)
    actual = utils.axis_to_elem(profs)
    expected = utils.axis_to_elem([
        [8, 1, 0, 0, 0, 16, 0],
        [0, 1, 8, 0, 0, 16, 0],
        [8, 0, 0, 1, 0, 16, 0],
        [0, 0, 8, 1, 0, 16, 0],
        [9, 0, 0, 0, 1, 15, 0],
        [5, 0, 4, 0, 1, 15, 0],
        [0, 0, 9, 0, 1, 15, 0],
        [9, 0, 0, 0, 0, 15, 1],
        [5, 0, 4, 0, 0, 15, 1],
        [0, 0, 9, 0, 0, 15, 1],
    ])
    assert np.setxor1d(actual, expected).size == 0

    profs = tr.expand_deviation_profiles(game, mask, role_index=0)
    actual = utils.axis_to_elem(profs)
    expected = utils.axis_to_elem([
        [8, 1, 0, 0, 0, 16, 0],
        [0, 1, 8, 0, 0, 16, 0],
        [8, 0, 0, 1, 0, 16, 0],
        [0, 0, 8, 1, 0, 16, 0],
    ])
    assert np.setxor1d(actual, expected).size == 0
Exemplo n.º 4
0
def test_identity_dev_expansion():
    """Test that identity dev expansion is correct"""
    game = rsgame.empty([3, 4], [4, 3])
    mask = [True, False, True, False, False, True, False]
    profs = ir.expand_deviation_profiles(game, mask)
    actual = utils.axis_to_elem(profs)
    expected = utils.axis_to_elem([
        [2, 1, 0, 0, 0, 4, 0],
        [1, 1, 1, 0, 0, 4, 0],
        [0, 1, 2, 0, 0, 4, 0],
        [2, 0, 0, 1, 0, 4, 0],
        [1, 0, 1, 1, 0, 4, 0],
        [0, 0, 2, 1, 0, 4, 0],
        [3, 0, 0, 0, 1, 3, 0],
        [2, 0, 1, 0, 1, 3, 0],
        [1, 0, 2, 0, 1, 3, 0],
        [0, 0, 3, 0, 1, 3, 0],
        [3, 0, 0, 0, 0, 3, 1],
        [2, 0, 1, 0, 0, 3, 1],
        [1, 0, 2, 0, 0, 3, 1],
        [0, 0, 3, 0, 0, 3, 1],
    ])
    assert np.setxor1d(actual, expected).size == 0

    profs = ir.expand_deviation_profiles(game, mask, role_index=0)
    actual = utils.axis_to_elem(profs)
    expected = utils.axis_to_elem([
        [2, 1, 0, 0, 0, 4, 0],
        [1, 1, 1, 0, 0, 4, 0],
        [0, 1, 2, 0, 0, 4, 0],
        [2, 0, 0, 1, 0, 4, 0],
        [1, 0, 1, 1, 0, 4, 0],
        [0, 0, 2, 1, 0, 4, 0],
    ])
    assert np.setxor1d(actual, expected).size == 0
Exemplo n.º 5
0
def test_dpr(keep_prob, game_desc):
    """Simple test that dpr functions are consistent"""
    players, strategies, red_players = game_desc
    # Create game and reduction
    game = gamegen.role_symmetric_game(players, strategies)
    game = gamegen.drop_profiles(game, keep_prob)
    sgame = gamegen.add_noise(game, 1, 3)
    red = reduction.DeviationPreserving(strategies, players, red_players)

    # Try to reduce game
    assert rsgame.basegame_copy(game) == red.full_game
    assert red.reduce_game(rsgame.basegame_copy(game)) == red.red_game
    red_game = red.reduce_game(game)
    red_game2 = reduction.reduce_game_dpr(game, red_players)
    red_sgame = red.reduce_game(sgame)

    # Assert that reduce_game_dpr produces identical results
    reduced_profiles = utils.axis_to_elem(red_game.profiles)
    reduced_profiles2 = utils.axis_to_elem(red_game2.profiles)
    assert np.setxor1d(reduced_profiles, reduced_profiles2).size == 0, \
        "different reduction functions didn't produce identical results"

    # Assert that reducing all profiles covers reduced game
    reduced_full_profiles = utils.axis_to_elem(
        red.reduce_profiles(game.profiles))
    assert np.setdiff1d(reduced_profiles, reduced_full_profiles).size == 0, \
        "reduced game contained profiles it shouldn't have"
    reduced_sample_profiles = utils.axis_to_elem(red_sgame.profiles)
    assert np.setdiff1d(reduced_sample_profiles,
                        reduced_full_profiles).size == 0, \
        "reduced sample game contained profiles it shouldn't have"
    assert np.setxor1d(reduced_sample_profiles,
                       reduced_profiles).size == 0, \
        "reduced sample game and reduced game had different profiles"

    # Assert that all contributing profiles are in the expansion of the reduced
    # game
    full_profiles = utils.axis_to_elem(game.profiles)
    full_reduced_profiles = utils.axis_to_elem(
        red.expand_profiles(red_game.profiles))
    assert np.setdiff1d(full_reduced_profiles, full_profiles).size == 0, \
        "full game did not have data for all profiles required of reduced"
    full_reduced_sample_profiles = utils.axis_to_elem(
        red.expand_profiles(red_sgame.profiles))
    assert np.setdiff1d(full_reduced_sample_profiles,
                        full_profiles).size == 0, \
        ("full sample game did not have data for all profiles required of "
         "reduced")
    assert np.setxor1d(full_reduced_profiles,
                       full_reduced_sample_profiles).size == 0, \
        "sample game didn't produce identical results"
Exemplo n.º 6
0
def removeTransits(time, flux, period, epoch, duration):
	halfDur = 0.5 * duration / 24.
	bad = np.where(time < epoch - period + halfDur)[0]
	for p in np.arange(epoch, time[-1] + period, period):
		bad = np.append(bad, np.where((p - halfDur < time) & (time < p + halfDur))[0])
	good = np.setxor1d(range(len(time)), bad)
	return time[good], flux[good]
Exemplo n.º 7
0
    def get_obstList(self,X,Y,Z):
        """
         return a list of all indices of lattice points within the boundaries of the conical scour pit obstacle.  x_s is defined in 'Scour at marine structures' by Richard Whitehouse, 1998.  Assumes river sand with phi (angle of repose) equal to 30 degrees.  h_cone is equal to rad_cone*tan(30) = rad_cone*0.57735
        """
       
        x_c_cone = self.x_c
        z_c_cone = self.z_c
        y_c_cone = 0
        x_s = 2.25*2*self.cyl_rad
        rad_cone = x_s + self.cyl_rad
        h_cone = rad_cone*0.57735

        floor_part = np.array(np.where(Y < h_cone)).flatten()

        dist = (X - self.x_c)**2 + (Z - self.z_c)**2;
        cyl_part = list(np.array(np.where( dist < self.cyl_rad**2)).flatten())

        scour_pit = np.array(np.where( (X - x_c_cone)**2 + (Z - z_c_cone)**2 <= ((self.cyl_rad/cone)/(h_cone))**2*(Y - y_c_cone)**2))

        # remove the scour pit from the floor
        obst_list = np.setxor1d(floor_part[:], 
                        np.intersect1d(floor_part[:],scour_pit[:]))


        # then add the cylinder
        obst_list = np.union1d(obst_list[:],cyl_part[:])
        
        return list(obst_list[:])
    def _arithmetical_operation(self, x, operator, in_place=False):
        operator, ioperator = {
            '+': (add, iadd),
            '-': (sub, isub),
            '*': (mul, imul),
            '/': (div, idiv),
            '**': (pow, ipow)
        }[operator]

        if in_place:
            if isinstance(x, self.__class__):
                with ndarray_write(self._domain), ndarray_write(self._range):
                    self[self._domain] = operator(self._range, x[self._domain])

                    exclusive_or = np.setxor1d(self._domain, x.domain)
                    self[exclusive_or] = np.full(exclusive_or.shape, np.nan)
            else:
                with ndarray_write(self._range):
                    self.range = ioperator(self.range, x)

            return self
        else:
            copy = ioperator(self.copy(), x)

            return copy
Exemplo n.º 9
0
def sets():
    global a, b, c, z, r

    print '###########################'
    print '#'
    print '#   集合操作'
    print '#'
    print '###########################'
    # 唯一元素
    print '唯一元素:', np.unique([0, 0, 1, 1, 5, 6, 7, 7, 2, 11], return_index=True, return_inverse=True)

    # 比较第二个数组中的元素是否在第一个数组中,返回bool类型的数组。
    d1 = np.array([0, 1, 2, 5, 0, 5])
    d2 = [0, 2, 5]
    print '元素包含:', np.in1d(d1, d2)

    # 交集
    print '交集:', np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1])

    # 差
    d1 = np.array([1, 2, 3, 2, 4, 1])
    d2 = np.array([3, 4, 5, 6])
    print '两个集合的差:', np.setdiff1d(d1, d2)

    # 异或
    d1 = np.array([1, 2, 3, 2, 4])
    d2 = np.array([2, 3, 5, 7, 5])
    print '两个集合异或:', np.setxor1d(a,b)

    # 并集
    print '并集:', np.union1d([-1, 0, 1], [-2, 0, 2])
Exemplo n.º 10
0
def sampleNo(xvar, yvar, N, avoididx):
    """Sample from pixels in mesh, excluding yes pixels and already sampled no pixels.

    :param xvar:
      Numpy array of centers of all columns in mesh.
    :param yvar:
      Numpy array of centers of all rows in mesh.
    :param N:
      Number of no pixels to sample.
    :param avoididx:
      1D array of indices from mesh that should NOT be sampled from.  Initially this will be the array
      of indices where the yes pixels are.
    :returns:
      Randomly chosen list of tuples of (x,y) coordinate points that are outside polygons.

    """

    allidx = np.arange(0, len(xvar)*len(yvar))  # flattened array of all indices in mesh
    noidx = np.setxor1d(allidx, avoididx) #allidx - avoididx
    #noidx = np.array(list(set(allidx) - set(avoididx)))
    nosampleidx = np.random.choice(noidx, size=N,replace=False)
    newavoididx = np.sort(np.hstack((avoididx, nosampleidx)))
    rowidx,colidx = np.unravel_index(nosampleidx, (len(yvar), len(xvar)))
    samples = []
    for row,col in zip(rowidx, colidx):
        xp = xvar[col]
        yp = yvar[row]
        samples.append((xp, yp))

    return (samples, newavoididx)
Exemplo n.º 11
0
def remove_from_file(data, filename):
    """trim out rows that match genes found in file <filename>
    First row is ignored
    if multiple entries per line, seperated by comma, take the first"""

    #load gene names from file
    ff = open(filename);
    xx = ff.readline();  #discard first line
    
    hk_genes = list();
    for line in ff:
      entries = line.split(',');
      hk_genes.append(entries[0].strip().lower());
    
    ff.close();
    
    #match hk genes to gene indices
    missing = 0;
    hk_indices = list();
    lower_row_labels = [gene.lower() for gene in data.row_labels];
    for hk_gene in hk_genes:
      try:
        ii = lower_row_labels.index(hk_gene);
        hk_indices.append(ii);
      except ValueError:
        missing+=1;
    
    #remove rows that match
    all_indices = np.arange(data.shape[0]);
    if(len(hk_indices) != 0):    
        keep_indices = np.setxor1d(all_indices,hk_indices); 
    else:
        keep_indices = all_indices;
    
    return data.subset_genes(keep_indices);
Exemplo n.º 12
0
def main():
    rec = args.rec
    r = mdtraj.load(rec)
    lig = args.lig
    l = mdtraj.load(lig)
    d = args.dist
    temp = args.temp
    pref = rec.split('.')[0]

    # can't figure out a non-hacky way to combine pdbs.
    cp_receptor = 'head -n -2 ' + rec
    receptor, r_err = call_cl(cp_receptor)
    cp_ligand = 'tail -n +2 ' + lig
    ligand, l_err = call_cl(cp_ligand)
    tf = open(temp, 'w')
    tf.write(receptor)
    tf.write(ligand)
    tf.close()

    # get indices of receptor within distance d of ligand
    comb = mdtraj.load(temp)
    # get ligand indices
    li = comb.topology.select('not protein')
    # find neighbors
    neighbors = mdtraj.compute_neighbors(comb, d, li)[0]
    # remove ligand from neighbor list (symmetric diff)
    n = np.setxor1d(li, neighbors)
    # easier to reset sometimes
    comb = mdtraj.load(temp)
    comb.restrict_atoms(n)
    comb.save_pdb(pref + '_trim.pdb')
    IPython.embed()
Exemplo n.º 13
0
    def get_obstList(self,X,Y,Z):
        """
         return a list of all indices of lattice points within the boundaries of the
         scour pit obstacle

        """
       
        ellip_a = 2.*2.*self.cyl_rad
        ellip_b = 2.*self.cyl_rad
        ellip_c = 8.*self.cyl_rad
        ellip_x = self.x_c
        ellip_z = self.z_c + self.cyl_rad
        ellip_y = ellip_b 

        floor_part = np.array(np.where(Y < ellip_b)).flatten()

        dist = (X - self.x_c)**2 + (Z - self.z_c)**2;
        cyl_part = list(np.array(np.where( dist < self.cyl_rad**2)).flatten())

        scour_pit = np.array(np.where( (X - ellip_x)**2/(ellip_a**2) + 
                        (Y - ellip_y)**2/(ellip_b**2) +
                        (Z - ellip_z)**2/(ellip_c**2) <= 1.)).flatten()

        # remove the scour pit from the floor
        obst_list = np.setxor1d(floor_part[:], 
                        np.intersect1d(floor_part[:],scour_pit[:]))


        # then add the cylinder
        obst_list = np.union1d(obst_list[:],cyl_part[:])
        
        return list(obst_list[:])
Exemplo n.º 14
0
    def __setSelectionRect(self, rect, action):
        # Set the current mouse drag selection rectangle
        if not rect.isValid():
            rect = rect.adjusted(-0.01, -0.01, 0.01, 0.01)

        rect = rect.intersected(self.contentsRect())

        indices = self.__selectionIndices(rect)

        if action & SelectAction.Clear:
            selection = []
        elif self.__selstate is not None:
            # Mouse drag selection is in progress. Update only the current
            # selection
            selection = self.__selstate.selection
        else:
            selection = self.__selection

        if action & SelectAction.Toogle:
            selection = np.setxor1d(selection, indices)
        elif action & SelectAction.Deselect:
            selection = np.setdiff1d(selection, indices)
        elif action & SelectAction.Select:
            selection = np.union1d(selection, indices)

        self.setSelection(selection)
Exemplo n.º 15
0
def remove_threes_and_fours(X, Y):
    """ Y: array-like, shape (n_examples,) """
    three_idxs = np.where(Y == 3)
    four_idxs = np.where(Y == 4)
    ia = np.indices(Y.shape)
    remaining_idxs = np.setxor1d(ia, np.concatenate((three_idxs[0], four_idxs[0])))
    return X[remaining_idxs], Y[remaining_idxs]
Exemplo n.º 16
0
    def get_obstList(self,X,Y,Z):
        """
         return a list of all indices of lattice points within the boundaries of the conical scour pit obstacle

        """
       
        x_c_cone = self.x_c
	z_c_cone = self.z_c
        y_c_cone = 0
        x_s = 2.25*2*self.cyl_rad
        rad_cone = x_s + self.cyl_rad
	h_cone = rad_cone*0.57735

        floor_part = np.array(np.where(Y < h_cone)).flatten()

        dist = (X - self.x_c)**2 + (Z - self.z_c)**2;
        cyl_part = list(np.array(np.where( dist < self.cyl_rad**2)).flatten())

        scour_pit = np.array(np.where( (X - x_c_cone)**2 + (Z - z_c_cone)**2 <= ((self.cyl_rad/cone)/(h_cone))**2*(Y - y_c_cone)**2))

        # remove the scour pit from the floor
        obst_list = np.setxor1d(floor_part[:], 
                        np.intersect1d(floor_part[:],scour_pit[:]))


        # then add the cylinder
        obst_list = np.union1d(obst_list[:],cyl_part[:])
        
        return list(obst_list[:])
Exemplo n.º 17
0
def PrepForTestTrain(dataSet, iterCnt, dirLoc, varNames, varIndxs) :
    dataLen = dataSet.shape[0]
    trainLen = numpy.floor(dataLen * 0.6666)

    for iterIndex in range(iterCnt) :
        print "%d of %d iterations" % (iterIndex + 1, iterCnt)

        # Save a random sample of the data for training, and the rest for testing
	trainSelected = random.sample(range(dataLen), trainLen)
        testSelected = numpy.setxor1d(trainSelected, range(dataLen))

        for subProj in varIndxs.keys() :
	    arffHeader = dirLoc + '/' + subProj + '/arffHeader.txt'
	    MakeARFFHeader(varNames[varIndxs[subProj]], arffHeader)
	    trainStem = "%s/%s/trainingData_%dof%d" % (dirLoc, subProj, iterIndex + 1, iterCnt)
	    testStem = "%s/testingData_%dof%d" % (dirLoc, subProj, iterIndex + 1, iterCnt)

            numpy.savetxt(trainStem + '.csv', dataSet[trainSelected, varIndxs[subProj]], fmt="%6.4f", delimiter=',')
	    os.system('cat %s %s > %s' % (arffHeader, trainStem + '.csv', trainStem + '.arff'))

	
	    numpy.savetxt(testStem + '.csv', dataSet[testSelected, varIndxs[subProj]], fmt="%6.4f", delimiter=',')
	    os.system('cat %s %s > %s' % (arffHeader, testStem + '.csv', testStem + '.arff'))
	


    return(fileNames)
Exemplo n.º 18
0
def csi_compute(param):
    """worker function"""
    get_var_from_file(os.path.join(GRATOOLS_CONFIG, 'Csi_config.py'))
    th_bins = data.TH_BINNING
    i, veci, dI, R, nside = param
    if i%10000 == 0:
        print i
    dIi = dI[i]
    Ri = R[i]
    dIij_list = [[] for l in range(0, len(th_bins)-1)]
    counts_list = [[] for l in range(0, len(th_bins)-1)]
    Rij_list = [[] for l in range(0, len(th_bins)-1)]
    for th, (thmin, thmax) in enumerate(zip(th_bins[:-1], th_bins[1:])):
        pixintorad_min = hp.query_disc(nside, veci, thmin)
        pixintorad_max = hp.query_disc(nside, veci, thmax)
        pixintoring = np.setxor1d(pixintorad_max, pixintorad_min)
        Rj = R[pixintoring]
        Rj = Rj[Rj > hp.UNSEEN]
        dIj = dI[pixintoring]
        dIj = dIj[dIj > hp.UNSEEN]
        dIij = np.sum(dIi*dIj)#-Imean**2)
        Rij = np.sum(Ri*Rj)
        counts = len(dIj)
        dIij_list[th].append(dIij)
        counts_list[th].append(counts)
        Rij_list[th].append(Rij)
    return dIij_list, counts_list, Rij_list
Exemplo n.º 19
0
def test_sample_game_payoff():
    profiles = [
        [0, 4, 0, 9],
        [0, 4, 1, 8],
        [0, 4, 4, 5],
        [0, 4, 3, 6],
    ]
    payoffs = [
        [
            [[0] * 4, [1, 2, 3, 4], [0] * 4, [5, 6, 7, 8]],
        ],
        [
            [[0, 0], [0, 0], [9, 10], [0, 0]],
        ],
        [
            [[0] * 3, [0] * 3, [0] * 3, [11, 12, 13]],
        ],
        [
            [[0] * 5, [14, 15, 16, 17, 18], [0] * 5, [0] * 5],
        ],
    ]
    game = rsgame.SampleGame([4, 9], 2, profiles, payoffs)
    red = reduction.DeviationPreserving([2, 2], [4, 9], [2, 3])
    red_game = red.reduce_game(game)

    prof_map = dict(zip(
        map(utils.hash_array, red_game.profiles),
        itertools.chain.from_iterable(red_game.sample_payoffs)))

    payoffs = prof_map[utils.hash_array([0, 2, 0, 3])]
    actual = payoffs[1]
    expected = [1, 2, 3, 4]
    assert np.setxor1d(actual, expected).size == 0
    actual = payoffs[3]
    expected = [5, 6, 7, 8]
    assert np.setxor1d(actual, expected).size == 0

    payoffs = prof_map[utils.hash_array([0, 2, 1, 2])]
    actual = payoffs[1]
    expected = [14, 15, 16, 17, 18]
    assert np.setxor1d(actual, expected).size == 3
    actual = payoffs[2]
    expected = [9, 10]
    assert np.setxor1d(actual, expected).size == 0
    actual = payoffs[3]
    expected = [11, 12, 13]
    assert np.setxor1d(actual, expected).size == 1
Exemplo n.º 20
0
 def get_obstList(self,X,Y,Z):
     
    #x = np.array(X); 
     y = np.array(Y); 
     z = np.array(Z);
     obst_l = np.where(z < self.L)
     obst_h = np.where(z > 0.2)
     obst = np.intersect1d(obst_l[:],obst_h[:])
     y_dist1 = np.abs(y - (self.y1+self.a/2.))
     ch1 = np.where(y_dist1<self.a/2.)
     ch1 = np.intersect1d(obst[:],ch1[:])
     obst = np.setxor1d(obst[:],ch1[:])
     y_dist2 = np.abs(y - (self.y1+self.a/2.+self.S))
     ch2 = np.where(y_dist2<self.a/2.)
     ch2 = np.intersect1d(obst[:],ch2[:])
     obst = np.setxor1d(obst[:],ch2[:])
     return obst[:]
def divide_and_limit(numerators, denominators, zero_limits):

    # make sure we have compatible arrays for the following computations
    if denominators.shape != numerators.shape:
        message = "numerator and denominator arrays do not share the same shape"
        logger.error(message)
        raise ValueError(message)

    # get the original shape so that later we can reshape the arrays
    # that we'll flatten back into their original shape
    original_shape = denominators.shape
    denominators = denominators.flatten()
    numerators = numerators.flatten()

    # create an array of values corresponding to the shape of the input arrays
    results = np.full(denominators.shape, np.NAN)

    # get a column vector of indices where denominator is not zero,
    # so as to avoid divide by zero in the following calculation
    not_zeros = np.where(denominators != 0)
    if len(not_zeros[0]) > 0:
        results[not_zeros] = numerators[not_zeros] / denominators[not_zeros]

    # get array of index values corresponding to the denominators array,
    # for example if array is 4 elements long the we get an indices array: [0, 1, 2, 3]
    index_values = np.array(range(len(denominators)), np.int)

    # perform an XOR on the array indices and the 'not zeros' array of indices
    # to get the indices of the array where the value is zero
    zeros = np.setxor1d(index_values, not_zeros)
    if zeros.size > 0:

        # we have a zero denominator sum value so we can't perform the normal calculation at these points,
        # so we limit the value to the zero limits

        if len(zero_limits) == 1:

            results[zeros] = zero_limits[0]

        elif len(zero_limits) == 2:

            # find indices where the value is zero, set the value at these indices to the first zero limit
            limits = np.where(numerators[zeros] == 0)
            if limits[0].size > 0:
                results[zeros[limits]] = zero_limits[0]

            # find indices where the value is not zero, set the value at these indices to the second zero limit
            limits = np.where(numerators[zeros] != 0)
            if limits[0].size > 0:
                results[zeros[limits]] = zero_limits[1]

        else:
            message = "Invalid zero limits argument, must contain 1 or 2 values"
            logger.error(message)
            raise ValueError(message)

    # reshape the results back to our original shape and return
    return np.reshape(results, original_shape)
Exemplo n.º 22
0
def split_samples(data, subsample_size):
    np.random.seed(RANDOM_SEED); #Set seed so outputs are repeatable
    sub_ii = np.random.choice(data.shape[1], subsample_size, replace=False);
    holdouts_ii = np.setxor1d(np.arange(data.shape[1]), sub_ii);

    subset = data.subset_samples(sub_ii);
    holdouts = data.subset_samples(holdouts_ii);

    return holdouts, subset;
Exemplo n.º 23
0
def find_N(O, j, E_c, E_s):
    N_cur = np.unique(E_s[j])
    N_prev = N_cur.copy()
    N_cur = expand(N_prev, E_c, E_s)
    while np.setxor1d(N_cur, N_prev, assume_unique=True).size > 0 and \
            not np.setdiff1d(O, N_cur, assume_unique=True).size == 0:
        N_cur, N_prev = expand(N_cur, E_c, E_s), N_cur
    res = np.setdiff1d(O, N_prev)
    return res
Exemplo n.º 24
0
    def partition(self, indices):
        '''Partition a Group according to a set of indices, possibly given some logical criterion e.g. using np.where, returning new objects (the set according to its indices and the complement
           indices := partition according to these'''

        C = copy.deepcopy(self)
        D = copy.deepcopy(self)
        D.atoms = D.atoms[indices]
        C.atoms = C.atoms[np.setxor1d(np.array(indices), np.arange(len(C.atoms)))]

        return D, C
Exemplo n.º 25
0
def Jmattofolds(Jmat, kfolds, type='ifg'):
    '''Creates a list with of length nfolds. Every entry of the list is a row index into the original Jmat.

    Args:

        * Jmat      -> connectivity matrix (Nifg x Nsar)
        * nfolds    -> Number of folds
        * type      -> Can be 'sar', 'ifg'

    Returns:

        * trainlist   -> List of row indices for training set
        * testlist    -> List of row indices for testing set'''

    Nifg, Nsar = Jmat.shape
    flag = True

    while flag:
        trainlist = []
        testlist = []
        if type == 'sar':
            pos = np.random.permutation(Nsar)
        elif type == 'ifg':
            pos = np.random.permutation(Nifg)

        breakpos = pos.size/kfolds
        inds = np.arange(Nifg, dtype=np.int)

        if type == 'sar':
            trnbreak = np.arange(0,Nsar,breakpos).astype(np.int)
            if trnbreak[-1] != (Nsar-1):
                trnbreak = np.append(trnbreak,Nsar-1)

        elif type == 'ifg':
            trnbreak = np.arange(0,Nifg,breakpos).astype(np.int)
            if trnbreak[-1] != (Nifg-1):
                trnbreak = np.append(trnbreak,Nifg-1)

        for ii in range(kfolds):
            itrain = pos[trnbreak[ii]:trnbreak[ii+1]]

            if type=='sar':
                temp = np.abs(Jmat[:,itrain]).sum(axis=1)
                itrain = (temp==2).flatnonzero()
                
            itest  = np.setxor1d(itrain, inds)
            trainlist.append(itrain)
            testlist.append(itest)

        for ii in range(kfolds):
            flag = flag & (len(trainlist[ii]) > 2)

        flag = not flag

    return trainlist, testlist
Exemplo n.º 26
0
def read_html(filename, person):
	file_data = urllib.urlopen(filename).read()
	soup = BeautifulSoup(file_data, 'html.parser')
	date_to_msgs = {}
	relationships = {}
	for thread in soup.find_all('div', class_='thread'):
		# check if thread is b/w 2 people
		thread_iter = thread.childGenerator()
		people_involved = thread_iter.next()
		people_involved = people_involved.split(',')
		
		# get all the messages		
		msgs = thread.find_all('p')
		msgs = [m.get_text().encode('ascii', 'ignore') for m in msgs]

		# holds the name and timestamp information
		msg_meta = thread.find_all('div', class_='message')
		# filter to only messages by our person of interest
		total_indices = np.arange(len(msg_meta))
		meta_filtered_idx = [idx for (idx,p) in enumerate(msg_meta) 
			if (person in p.span.get_text().encode('ascii','ignore'))]
		msgs_filtered = list(array(msgs)[meta_filtered_idx])
		
		
		# also the timestamps mapping to the msgs
		just_timestamps = [str(ts.find('span', class_='meta').get_text()) 
			for ts in msg_meta if ts.find('span', class_='meta')]
		just_timestamps = [parse(t) for t in just_timestamps]
		timestamps_filtered = list(array(just_timestamps)[meta_filtered_idx])
		for idx,ts in enumerate(timestamps_filtered):
			if ts not in date_to_msgs:
				date_to_msgs[ts] = [msgs_filtered[idx]]
			else:
				date_to_msgs[ts].append(msgs_filtered[idx])

		if len(people_involved) == 2:
			other = people_involved[0] if person not in people_involved[0] else people_involved[1]
			# get the other person's info
			not_indices = np.setxor1d(total_indices, meta_filtered_idx)
			not_indices = [int(i) for i in not_indices]
			# sometimes the xor returns floats
			if (len(not_indices) > 0 and isinstance(not_indices[0], int)):
				other_filtered = list(array(msgs)[not_indices])
				other_timestamps = list(array(just_timestamps)[not_indices])
				relationships[other] = ([[timestamps_filtered],[msgs_filtered]], 
								[[other_timestamps],[other_filtered]]) # -person-, -other-
			
				ts_grouped = [list(g) for k, g in itertools.groupby(timestamps_filtered, key=lambda d: d.date())]
				other_ts_grouped = [list(g) for k, g in itertools.groupby(other_timestamps, key=lambda d: d.date())]

				[senti, msgs_grouped] = group_msgs(msgs_filtered, ts_grouped)
				[other_senti, other_msgs_grouped] = group_msgs(other_filtered, other_ts_grouped)
				print senti
				print other_senti
Exemplo n.º 27
0
 def get_obstList(self,X,Y,Z):
    """ return a list of indices within the boundary of the channel floor
    """
    #x = np.array(X); 
    y = np.array(Y); z = np.array(Z);
    cav1 = np.where(z >= self.z_start)
    cav2 = np.where(z <= self.z_end)
    ol = np.setxor1d(cav1[:],cav2[:])
    cav3 = np.where(y <= self.depth)
    ol = np.intersect1d(ol[:],cav3[:])
    return ol[:]
Exemplo n.º 28
0
def nd_diffxor(a, b, uni=False):
    """using setxor... it is slower than nd_diff, 36 microseconds vs 18.2
    but this is faster for large sets
    """
    a_view = _view_as_(a)
    b_view = _view_as_(b)
    good = _check_dtype_(a_view, b_view)  # check dtypes
    if not good:
        return None
    ab = np.setxor1d(a_view, b_view, assume_unique=uni)
    return ab.view(a.dtype).reshape(-1, ab.shape[0]).squeeze()
Exemplo n.º 29
0
 def calc_indices(self, orig_shape):
     indices = self.parameters['indices']
     if isinstance(indices, list):
         self.indices = np.array(indices)
     else:
         indices_list = indices.split(':')
         indices_list = [int(i) for i in indices_list]
         if len(indices_list) is 2:
             indices_list.append(1)
         self.indices = np.arange(*indices_list)
     return np.setxor1d(np.arange(orig_shape), self.indices)
Exemplo n.º 30
0
def test_elem_axis():
    x = np.array([[5.4, 2.2],
                  [5.7, 2.8],
                  [9.6, 1.2]], float)
    assert np.all(x == utils.elem_to_axis(utils.axis_to_elem(x), float))
    assert np.all(x.astype(int) ==
                  utils.elem_to_axis(utils.axis_to_elem(x.astype(int)), int))
    assert utils.unique_axis(x).shape == (3, 2)
    array, counts = utils.unique_axis(x.astype(int), return_counts=True)
    assert array.shape == (2, 2)
    assert not np.setxor1d(counts, [2, 1]).size
Exemplo n.º 31
0
def main():
    import torch
    from torch.optim import lr_scheduler
    import torch.optim as optim
    from torch.autograd import Variable
    from trainer import fit
    import numpy as np
    cuda = torch.cuda.is_available()
    # Training settings

    parser = argparse.ArgumentParser(
        description='cross subject domain adaptation')

    parser.add_argument('--batch-size',
                        type=int,
                        default=100,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=100,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')

    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--save-model',
                        action='store_true',
                        default=True,
                        help='For Saving the current Model')

    # Writer will output to ./runs/ directory by default

    fold_idx = 4
    gamma = 0.7
    margin = 1.0

    DAsetting = False
    args = parser.parse_args()
    args.seed = 0
    args.use_tensorboard = True
    args.save_model = True
    n_epochs = 200
    startepoch = 0

    folder_name = 'exp11_0630'
    comment = 'deep4' + str(fold_idx) + '_g_' + str(gamma) + '_m_' + str(
        margin)

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    torch.cuda.manual_seed(args.seed)
    np.random.seed(args.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    device = torch.device("cuda" if use_cuda else "cpu")
    #kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    from datetime import datetime
    import os
    loging = False

    x_data, y_data = load_smt()
    x_data = x_data[:, :, :, 100:]
    #get subject number
    y_subj = np.zeros([108, 200])
    for i in range(108):
        y_subj[i, :] = i * 2
    y_subj = y_data.reshape(108, 200) + y_subj
    y_subj = y_subj.reshape(21600)
    #y_subj = np.concatenate([y_data,y_subj],axis=1)

    # plt.imshow(x_data[100,0,:,:])
    # For classification data
    valtype = 'subj'
    # if x_data.shape[2] != 60:
    #     x_data = x_data[:,:,2:,:]
    # plt.imshow(x_data[1000,0,:,:])
    # #subj - 0-27 train
    # train_subj1 = np.r_[0:27]
    # train_subj2 = np.r_[0:27]+54
    #
    # test_subj = np.r_[27:54,54+27:108]

    #chidx = np.r_[7:11, 12:15, 17:21, 32:41] #오연조건
    # chidx = np.r_[2:56, 60:62]
    # x_data = x_data[:,:,chidx,:]

    # For Domain adaptation setting
    if DAsetting:
        # test_subj = np.r_[fold_idx * 9:fold_idx * 9 + 9, fold_idx * 9 + 54:fold_idx * 9 + 9 + 54]
        test_subj_id = 39
        test_subj = np.r_[test_subj_id:test_subj_id + 1]
        train_subj1 = np.setxor1d(np.r_[0:108], test_subj)
        train_subj2 = test_subj

        n_targets = 60
        trial_s = (0, 200)
        trial_t = (0, n_targets)

        trial_val = (n_targets, 200)

        # dataset_train1 = GigaDataset(x=x_data, y=y_data, valtype=valtype, istrain=True,subj=train_subj1,trial=trial_s)
        dataset_train = GigaDataset(x=x_data,
                                    y=y_data,
                                    valtype=valtype,
                                    istrain=True,
                                    subj=train_subj2,
                                    trial=trial_t)
        # dataset_train = dataset_train1.__add__(dataset_train2)
        dataset_test = GigaDataset(x=x_data,
                                   y=y_data,
                                   valtype=valtype,
                                   istrain=False,
                                   subj=test_subj,
                                   trial=trial_val)

        triplet_dataset_train = TripletGigaDA(x=x_data,
                                              y=y_subj,
                                              valtype=valtype,
                                              istrain=True,
                                              subj_s=train_subj1,
                                              trial_s=trial_s,
                                              subj_t=train_subj2,
                                              trial_t=trial_t)

        # triplet_dataset_train2 = TripletGiga2(x=x_data, y=y_subj, valtype=valtype, istrain=True, subj=train_subj2, trial=trial_t)
        # triplet_dataset_train = triplet_dataset_train1.__add__(triplet_dataset_train2)

        triplet_dataset_test = TripletGigaDA(x=x_data,
                                             y=y_subj,
                                             valtype=valtype,
                                             istrain=True,
                                             subj_s=train_subj1,
                                             trial_s=trial_s,
                                             subj_t=test_subj,
                                             trial_t=trial_val)

    else:  #DG setting
        # test_subj = np.r_[fold_idx*9:fold_idx*9+9,fold_idx*9+54:fold_idx*9+9+54]
        # train_subj = test_subj
        # trial_train = (0, 30)
        # trial_val = (30, 200)
        #
        # bci_excellent = np.r_[43, 20, 27, 1, 28, 32, 35, 44, 36, 2]
        # bci_excellent = np.concatenate([bci_excellent, bci_excellent + 54])

        test_subj = np.r_[fold_idx * 9:fold_idx * 9 + 9,
                          fold_idx * 9 + 54:fold_idx * 9 + 9 + 54]
        # train_subj = np.setdiff1d(bci_excellent, test_subj)
        # bci_excellent.sort()

        print('test subj:' + str(test_subj))
        train_subj = np.setdiff1d(np.r_[0:108], test_subj)

        trial_train = (0, 200)
        trial_val = (0, 200)

        dataset_train = GigaDataset(x=x_data,
                                    y=y_data,
                                    valtype=valtype,
                                    istrain=True,
                                    subj=train_subj,
                                    trial=trial_train)
        dataset_test = GigaDataset(x=x_data,
                                   y=y_data,
                                   valtype=valtype,
                                   istrain=False,
                                   subj=test_subj,
                                   trial=trial_val)

        triplet_dataset_train = TripletGiga2(x=x_data,
                                             y=y_subj,
                                             valtype=valtype,
                                             istrain=True,
                                             subj=train_subj,
                                             trial=trial_train)
        # triplet_dataset_train2 = TripletGiga2(x=x_data[:,:,:,10:], y=y_subj, valtype=valtype, istrain=True, subj=train_subj,
        #                                      trial=trial_train)
        # triplet_dataset_train = triplet_dataset_train1.__add__(triplet_dataset_train2)

        triplet_dataset_test = TripletGiga2(x=x_data,
                                            y=y_subj,
                                            valtype=valtype,
                                            istrain=False,
                                            subj=test_subj,
                                            trial=trial_val)

    train_loader = torch.utils.data.DataLoader(dataset_train,
                                               batch_size=args.batch_size,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(dataset_test,
                                              batch_size=args.batch_size,
                                              shuffle=False)
    triplet_train_loader = torch.utils.data.DataLoader(
        triplet_dataset_train, batch_size=args.batch_size, shuffle=True)
    triplet_test_loader = torch.utils.data.DataLoader(
        triplet_dataset_test, batch_size=args.batch_size, shuffle=False)

    ###################################################################################################################
    # make model for metric learning
    from networks import DWConvNet, basenet, Deep4Net_origin, Deep4Net, Deep4NetWs, EmbeddingDeep4CNN, EmbeddingDeep4CNN_bn, TripletNet, FineShallowCNN, EmbeddingDeepCNN, QuintupletNet, EmbeddingShallowCNN
    from losses import TripletLoss_dev2, TripLoss, ContrastiveLoss_dk

    if gamma == 1.0:
        model = Deep4Net_origin()
    else:
        embedding_net = Deep4Net()
        print(embedding_net)
        model = TripletNet(embedding_net)
    #exp3-1 fc레이어 한층더
    # model.fc = nn.Sequential(
    #     nn.Linear(model.num_hidden,128),
    #     nn.ReLU(),
    #     nn.Dropout(),
    #     nn.Linear(128,2)
    # )
    if cuda:
        model.cuda()
    loss_fn = TripletLoss_dev2(margin, gamma).cuda()

    log_interval = 10

    ##########################################################
    # optimizer = optim.Adam(model.parameters())
    optimizer = optim.SGD(model.parameters(),
                          lr=0.1,
                          momentum=0.9,
                          weight_decay=0.0005)
    milestones = [15, 30, 50, 120]
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=milestones,
                                         gamma=0.1)  # 너무 빨리 떨구면 언더피팅하는듯

    # exp1 : 62ch 0~5fold까지 셋팅
    # optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    # scheduler = lr_scheduler.StepLR(optimizer, 5, gamma=0.5, last_epoch=-1)

    #exp2 : 운동영역주변 20ch, train성능이 fit하지 않는 현상이 g=0.7,1.0 양족에서 모두 나타나서, 기존의 러닝레이트보다 강하게 줘보고 실험코자함
    # optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    # scheduler = lr_scheduler.StepLR(optimizer, 5, gamma=1.0, last_epoch=-1)
    # #
    # #exp4, exp5
    # optimizer = optim.SGD(model.parameters(), lr=0.005/gamma, momentum=0.9)
    # scheduler = lr_scheduler.StepLR(optimizer, 10, gamma=0.8, last_epoch=-1) #너무 빨리 떨구면 언더피팅하는듯

    # optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    # scheduler = lr_scheduler.StepLR(optimizer, 5, gamma=0.8, last_epoch=-1) #너무 빨리 떨구면 언더피팅하는듯

    # exp5
    # optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    # scheduler = lr_scheduler.StepLR(optimizer, 10, gamma=0.5, last_epoch=-1)

    # exp7
    # optimizer = optim.SGD(model.parameters(), lr=0.005 / gamma, momentum=0.9)
    # scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[100,200], gamma=0.7)  # 너무 빨리 떨구면 언더피팅하는듯

    #model for validation
    evalmodel = nn.Sequential(model.embedding_net, model.fc,
                              nn.LogSoftmax(dim=1)).to(device)

    print('____________DANet____________')
    print(model)

    #save someting

    model_save_path = 'model/' + folder_name + '/' + comment + '/'
    if (args.save_model):
        if not os.path.isdir(model_save_path):
            os.makedirs(model_save_path)

    if loging:
        fname = model_save_path + datetime.today().strftime(
            "%m_%d_%H_%M") + ".txt"
        f = open(fname, 'w')

    if args.use_tensorboard:
        writer = SummaryWriter(comment=comment)
        writer.add_text('optimizer', str(optimizer))
        writer.add_text('scheduler', str(milestones))
        writer.add_text('model_save_path', model_save_path)
        writer.add_text('model', str(model))
        writer.close()
    # load_model_path = 'C:\\Users\dk\PycharmProjects\giga_cnn\model\deep100_negsubj\\fold_0_g_0.7\danet_0.7_49.pt'
    #'C:\\Users\dk\PycharmProjects\giga_cnn\구모델\\clf_83_8.pt'#'clf_29.pt' #'triplet_mg26.pt'#'clf_triplet2_5.pt' #'triplet_31.pt'
    # load_model_path = 'C:\\Users\dk\PycharmProjects\giga_cnn\model\exp6_basenet\\fold_0_g_0.6\danet_0.6_86.pt'

    if startepoch > 0:
        load_model_path = model_save_path + 'danet_' + str(gamma) + '_' + str(
            startepoch) + '.pt'
        model_save_path = model_save_path + '(cont)'
    else:
        load_model_path = None
    if load_model_path is not None:
        model.load_state_dict(torch.load(load_model_path))

    # for param in model.embedding_net.parameters():
    #     param.requires_grad = False

    epochidx = 1

    for epochidx in range(100):
        fit(triplet_train_loader, triplet_test_loader, model, loss_fn,
            optimizer, scheduler, epochidx, n_epochs, cuda, log_interval)
        print(epochidx)
        train_loss, train_score = eval(args, evalmodel, device, train_loader)
        eval_loss, eval_score = eval(args, evalmodel, device, test_loader)

        if args.use_tensorboard:
            writer.add_scalar('Train/Loss',
                              np.mean(train_loss) / args.batch_size, epochidx)
            writer.add_scalar('Train/Acc',
                              np.mean(train_score) / args.batch_size, epochidx)
            writer.add_scalar('Eval/Loss',
                              np.mean(eval_loss) / args.batch_size, epochidx)
            writer.add_scalar('Eval/Acc',
                              np.mean(eval_score) / args.batch_size, epochidx)
            writer.close()
        if args.save_model:
            torch.save(
                model.state_dict(), model_save_path + 'danet_' + str(gamma) +
                '_' + str(epochidx) + '.pt')
    def load_mask(self, img, index):
        imgh, imgw = img.shape[0:2]
        mask_type = self.mask

        # external + random block
        if mask_type == 4:
            mask_type = 1 if np.random.binomial(1, 0.5) == 1 else 3

        # external + random block + half
        elif mask_type == 5:
            mask_type = np.random.randint(1, 4)

        # random block
        if mask_type == 1:
            mask = create_mask(imgw, imgh, imgw // 2, imgh // 2)
            return mask

        if mask_type == 8:
            # print(imgw, imgh)
            # x = random.randint(imgw//4, imgw)
            # y = random.randint(imgh//4, imgh)
            # mask = create_mask(imgw, imgh, x, y)
            # if np.random.binomial(1, 0.1) > 0:
            #     mask = np.ones_like(mask)

            mask = np.ones([imgw, imgh])
            mask = (mask * 255).astype(np.uint8)
            return mask, mask

        # half
        if mask_type == 2:
            # randomly choose right or left
            return create_mask(imgw, imgh, imgw // 2, imgh,
                               0 if random.random() < 0.5 else imgw // 2, 0)

        # external
        if mask_type == 3:
            mask_index = random.randint(0, len(self.mask_data) - 1)
            mask = imread(self.mask_data[mask_index])
            mask = self.resize(mask, imgh, imgw)
            mask = (mask > 0).astype(
                np.uint8) * 255  # threshold due to interpolation
            return mask

        # test mode: load mask non random
        if mask_type == 6:
            mask = imread(self.mask_data[index])
            mask = self.resize(mask, imgh, imgw, centerCrop=False)
            mask = rgb2gray(mask)
            mask = (mask > 0).astype(np.uint8) * 255
            return mask

        if mask_type == 7:
            bbox = np.array(self.data[index]['word_bb'])
            max_pad = np.max([imgh, imgw])
            if self._mask_pad == -1:
                # coefficient = 1
                # pad = coefficient*self._count//self._mask_pad_update_step
                # if pad > np.max(self.input_size+coefficient):
                #     pad = np.random.randint(0, np.max(self.input_size), 1)[0]
                # elif pad == 0:
                #     pad = 0
                # else:
                #     pad = np.random.randint(0, pad)

                if np.random.binomial(1, 0.1) > 0:
                    pad = max_pad
                else:
                    pad = np.random.randint(self._mask_safe_pad,
                                            np.ceil(max_pad / 2))

            elif self._mask_pad == -2:
                # pad = np.random.randint(2, self._mask_pad, 1)[0]
                if self.data[index]['word_percent'] < 5:
                    pad = 20
                elif self.data[index]['word_percent'] < 10:
                    pad = 15
                elif self.data[index]['word_percent'] < 15:
                    pad = 10
                else:
                    pad = 5
            else:
                pad = self._mask_pad

            if not self.training:
                return mask_generation_with_BB([imgh, imgw], bbox, pad), \
                        mask_generation_with_BB([imgh, imgw], bbox, self._mask_safe_pad)

            # return np.ones([imgh, imgw]), mask_generation_with_BB([imgh, imgw], bbox, self._mask_safe_pad)

            nb_instance = bbox.shape[-1]
            # index_selected = np.random.permutation(nb_instance)[:np.random.choice(nb_instance-1)+1]
            index_selected = np.random.permutation(nb_instance)[:nb_instance -
                                                                nb_instance //
                                                                5]
            index_all = np.array(range(nb_instance))
            index_not_selected = np.setxor1d(index_selected, index_all)
            #print(len(index_selected), len(index_not_selected))

            BB_not_selected = bbox[..., index_not_selected]
            BB2_selected = bbox[..., index_selected]
            mask_not_selected = mask_generation_with_BB([imgh, imgw],
                                                        BB_not_selected,
                                                        self._mask_safe_pad)
            mask_selected = mask_generation_with_BB([imgh, imgw], BB2_selected,
                                                    self._mask_safe_pad)
            mask_safe_bbox = np.multiply(mask_selected, 1 - mask_not_selected)

            if pad >= max_pad or np.sum(mask_safe_bbox) == 0:
                return np.ones([imgh, imgw]), mask_generation_with_BB(
                    [imgh, imgw], bbox, self._mask_safe_pad)
            else:
                mask_selected = mask_generation_with_BB([imgh, imgw],
                                                        BB2_selected, pad)
                masks_pad = np.multiply(mask_selected, 1 - mask_not_selected)
                return masks_pad, mask_safe_bbox
Exemplo n.º 33
0
                print('Time = ', time.time() - t)

                #CHS_gather[ki][di] = CHS
                silh_gather[ki][di] = silh_avg

                # Distribution of cluster sizes.
                ClSz, _ = np.histogram(kmLabels, k)
                ClSz_var[ki][di] = ClSz.var()

                # Show country names that belong to each cluster
                xx = []
                for i in range(k):
                    print('Cluster #', i)
                    inds = np.where(kmLabels == i)
                    inds = inds[0]
                    ninds = np.setxor1d(inds, range(num_countries))

                    #print(countriesLL.name[inds])
                    tradesWithinCluster = trade_ntwrk[np.ix_(inds, inds)]
                    tradesLeavingCluster = trade_ntwrk[np.ix_(inds, ninds)]
                    tradesEnteringCluster = trade_ntwrk[np.ix_(ninds, inds)]
                    tradesOutsideCluster = trade_ntwrk[np.ix_(ninds, ninds)]

                    xx = [
                        tradesWithinCluster[np.nonzero(
                            tradesWithinCluster)].mean(), tradesLeavingCluster[
                                np.nonzero(tradesLeavingCluster)].mean(),
                        tradesEnteringCluster[np.nonzero(
                            tradesEnteringCluster)].mean(),
                        tradesOutsideCluster[np.nonzero(
                            tradesOutsideCluster)].mean(),
Exemplo n.º 34
0
    def computeTravelTimes(self, slowness, calcOthers=False):
        """Compute the travel times and fill data and time matrix
        for later use of response and Jacobian, respectively.
        For response only active sources are needed, for Jacobian all.
        """
        mesh = self.mesh()
        nNodes = mesh.nodeCount()
        midPoints = self.mesh().cellCenters()
        param_markers = np.unique(mesh.cellMarkers())
        param_count = len(param_markers)
        data = self.data()
        if len(slowness) == mesh.cellCount():
            mesh.setCellAttributes(slowness)
            # self.mapModel(slowness)
        elif len(slowness) == param_count:
            # map the regions in the mesh to slowness
            slow_map = pg.stdMapF_F()
            min_reg_num = min(param_markers)
            for i, si in enumerate(slowness):
                slow_map.insert(float(i + min_reg_num), si)

            mesh.mapCellAttributes(slow_map)
        else:
            raise ValueError("Wrong no of parameters. Mesh size: {}, no "
                             "of regions: {}, and number of slowness values:"
                             "{}".format(mesh.cellCount(), param_count,
                                         len(slowness)))

        times = pg.RVector(nNodes, 0.)
        upTags = np.zeros(nNodes)
        downTags = np.zeros(nNodes)
        sourceIndices = np.unique(data("s"))
        if calcOthers:
            ns = len(sourceIndices)
            geophoneIndices = np.setxor1d(np.arange(data.sensorCount()),
                                          sourceIndices)
            sourceIndices = geophoneIndices
            #            geophoneIndices = np.unique(data("g"))
            if self.debug:
                print("{:d}-{:d}={:d}".format(data.sensorCount(), ns,
                                              len(sourceIndices)))


#        if self.debug:  # resize not working
#            self.solution().resize(self.mesh().nodeCount(), self.nSensors)
#            print(self.solution().rows(), self.solution().cols())
        for iSource in np.array(sourceIndices, dtype=int):
            if self.debug:
                print(iSource)
            # initial condition (reset vectors)
            times *= 0.0
            upTags *= 0
            downTags *= 0
            downwind = set()
            source = data.sensorPosition(int(iSource))
            cell = mesh.findCell(source)
            # fill in nodes around source using local smoothness
            for i, n in enumerate(cell.nodes()):
                times[n.id()] = cell.attribute() * n.pos().distance(source)
                upTags[n.id()] = 1
            for i, n in enumerate(cell.nodes()):
                tmpNodes = pg.commonNodes(n.cellSet())
                for nn in tmpNodes:
                    if not upTags[nn.id()] and not downTags[nn.id()]:
                        downwind.add(nn)
                        downTags[nn.id()] = 1

            while len(downwind) > 0:  # start fast marching
                fastMarch(mesh, downwind, times, upTags, downTags)

            self.dataMatrix[iSource] = pg.interpolate(
                mesh, times, destPos=data.sensorPositions())
            self.timeMatrix[iSource] = pg.interpolate(mesh,
                                                      times,
                                                      destPos=midPoints)

            if self.debug:
                print(self.solution().rows(), self.solution().cols())
                print(len(times), self.mesh())
                self.solution()[int(iSource)] = times
                self.solution().setCol(int(iSource), times)
Exemplo n.º 35
0
    def generate_clusters(self, density_factor, ligand_file,
                          clustercenter_file):
        """Generate hydration sites from water molecules found in the binding site
        during the simulation. Clustering is done in two steps; i). An initial clustering over a 10%
        of frames, and ii). A refinement step where all frames are used.

        Parameters

        ----------
        ligand_file : string
            Name of the PDB file containing atomic coordinates of the ligand,
            assumed to be co-crystallized with the protein.

        Returns
        -------
        final_cluster_coords : numpy.ndarray
            Coordinates of hydration sites, represented by a 2-D array with shape N x 3,
            where N is the number of hydration sites identified during clustering.

        site_waters : list
            List of N sub-lists where N is the number of identified hydration sites, each sublist
            consist of a 3-element tuple for every water identified in that site. First element of
            the tuple is frame number, second element is correct index of the oxygen atom in the
            the original topology and third element is the offset index as read from a version of
            a trimmed version trajectory for clustering.

        Notes
        -----
        The following attributes of the object are updated when the clustering is successfully completed.
        self.hsa_region_O_ids:
            The indices of water oxygen atoms in HSA region for each frame are stored
            in the corresponding lists.
        self.hsa_region_flat_ids:
            Same as above except that indices are not atom indices from the topology
            but in a sequence from 0 to N, where N is the total number of water oxygen atoms found in the
            HSA region throughout the simulation.
        self.hsa_region_water_coords:
            An N x 3 numpy array is initialized, where N is the total number of water water oxygen atoms found in the
            HSA region throughout the simulation. The array gets populated during individual frame processing.
        """
        sphere_radius = md.utils.in_units_of(1.0, "angstroms", "nanometers")
        topology = md.load_topology(self.topology_file)
        if self.non_water_atom_ids.shape[0] == 0:
            raise Exception(
                ValueError,
                "Clustering is supported only for solute-solvent systems, no solute atoms found."
            )

        ligand = md.load_pdb(ligand_file, no_boxchk=True)
        ligand_coords = ligand.xyz[0, :, :]
        binding_site_atom_indices = np.asarray(
            list(range(ligand_coords.shape[0])))
        init_cluster_coords = None
        # Step 1: Initial Clustering if user didn't provide cluster centers
        if clustercenter_file is None:
            clustering_stride = 10
            print("Reading trajectory for clustering.")
            with md.open(self.trajectory) as f:
                f.seek(self.start_frame)
                # read all frames if no frames specified by user
                if self.num_frames is None:
                    trj_short = f.read_as_traj(
                        topology,
                        atom_indices=np.concatenate(
                            (binding_site_atom_indices,
                             self.wat_oxygen_atom_ids
                             )))[self.start_frame::clustering_stride]
                else:
                    trj_short = f.read_as_traj(
                        topology,
                        atom_indices=np.concatenate((binding_site_atom_indices,
                                                     self.wat_oxygen_atom_ids))
                    )[self.start_frame:self.num_frames:clustering_stride]
                    print(trj_short.n_frames)
                if trj_short.n_frames < 10:
                    sys.exit(
                        "Clustering requires at least 100 frames, current trajectory contains {0:d} frames."
                        .format(trj_short.n_frames))
                print("Performing an initial clustering over {0:d} frames.".
                      format(trj_short.n_frames))
                # Obtain water molecules solvating the binding site
                # FIXME: This is a workaround to use MDTraj compute_neighbor function xyz coordinates of the trajectory are
                # modified such that first n atoms coordinates are switched to n atoms of ligand coordinates.
                # Unexpected things will happen if the number of solute atoms less than the number of ligand atoms, which is
                # highly unlikely.
                coords = trj_short.xyz
                for i_frame in range(trj_short.n_frames):
                    for pseudo_index in range(
                            binding_site_atom_indices.shape[0]):
                        coords[i_frame, pseudo_index, :] = ligand_coords[
                            pseudo_index, :]

                haystack = np.setdiff1d(trj_short.topology.select("all"),
                                        binding_site_atom_indices)
                binding_site_waters = md.compute_neighbors(
                    trj_short,
                    self.hsa_region_radius,
                    binding_site_atom_indices,
                    haystack_indices=haystack)
                # generate a list of tuples, each tuple is a water and corresponding frame number in trj_short
                water_id_frame_list = [(i, nbr)
                                       for i in range(len(binding_site_waters))
                                       for nbr in binding_site_waters[i]]

                # Start initial clustering by building a KDTree and get initial neighbor count for all waters
                water_coordinates = np.ma.array(
                    [coords[wat[0], wat[1], :] for wat in water_id_frame_list],
                    mask=False)
                tree = spatial.cKDTree(water_coordinates)
                nbr_list = tree.query_ball_point(water_coordinates,
                                                 sphere_radius)
                nbr_count_list = np.ma.array([len(nbrs) for nbrs in nbr_list],
                                             mask=False)
                cutoff = trj_short.n_frames * density_factor * 0.1401
                if np.ceil(cutoff) - cutoff <= 0.5:
                    cutoff = np.ceil(cutoff)
                else:
                    cutoff = np.floor(cutoff)
                n_wat = 3 * cutoff

                # Set up clustering loop
                cluster_list = []
                cluster_iter = 0
                while n_wat > cutoff:
                    # Get water with max nbrs and retrieve its neighbors and marked for exclusion in next iteration
                    max_index = np.argmax(nbr_count_list)
                    to_exclude = np.array(nbr_list[max_index])
                    # Set current water count to current neighbors plus one for the water itself
                    n_wat = len(to_exclude) + 1

                    # Mask current water, its neighbors so that they are not considered in the next iteration
                    nbr_count_list.mask[to_exclude] = True
                    nbr_count_list.mask[max_index] = True
                    # Mask current waters' and its neighbors' coords so that they are not considered in the next iteration
                    water_coordinates.mask[to_exclude] = True
                    water_coordinates.mask[max_index] = True

                    # Accumulate neighbors for each water in current cluster, removing common neighbors
                    nbrs_of_to_exclude = np.unique(
                        np.array([
                            n_excluded
                            for excluded_nbrs in nbr_list[to_exclude]
                            for n_excluded in excluded_nbrs
                        ]))

                    # Obtain the list of waters whose neighbors need to be updated due to exclusion of the waters above
                    to_update = np.setxor1d(to_exclude, nbrs_of_to_exclude)
                    to_update = np.setdiff1d(to_update, np.asarray(max_index))

                    # Update the neighbor count for each water from the list generated above
                    if to_update.shape[0] != 0:
                        tree = spatial.cKDTree(water_coordinates)
                        updated_nbr_list = tree.query_ball_point(
                            water_coordinates[to_update], sphere_radius)
                        # for each updated member, get its original index and update the original neighbor search list
                        for index, nbrs in enumerate(updated_nbr_list):
                            if not nbr_count_list.mask[to_update[index]]:
                                nbr_count_list[to_update[index]] = len(nbrs)

                    # Check distances with previously identified clusters and do not consider if within 1.2 A
                    # of an existing cluster
                    current_wat = water_id_frame_list[max_index]
                    current_wat_coords = md.utils.in_units_of(
                        coords[current_wat[0], current_wat[1], :],
                        "nanometers", "angstroms")
                    near_flag = 0
                    if len(cluster_list) != 0:
                        for clust in cluster_list:
                            clust_coords = coords[clust[0], clust[1], :]
                            dist = np.linalg.norm(current_wat_coords -
                                                  clust_coords)
                            if dist < 1.20:
                                near_flag += 1
                    if near_flag == 0:
                        cluster_iter += 1
                        cluster_list.append(water_id_frame_list[max_index])
                init_cluster_coords = [
                    coords[cluster[0], cluster[1], :]
                    for cluster in cluster_list
                ]
        else:
            clusters_pdb_file = md.load_pdb(clustercenter_file, no_boxchk=True)
            init_cluster_coords = clusters_pdb_file.xyz[0, :, :]

        # Read full trajectory
        print("Reading trajectory to obtain water molecules for each cluster.")
        with md.open(self.trajectory) as f:
            f.seek(self.start_frame)
            if self.num_frames is None:
                trj = f.read_as_traj(topology,
                                     stride=1,
                                     atom_indices=np.concatenate(
                                         (binding_site_atom_indices,
                                          self.wat_oxygen_atom_ids)))
                self.num_frames = trj.n_frames
            else:
                trj = f.read_as_traj(topology,
                                     n_frames=self.num_frames,
                                     stride=1,
                                     atom_indices=np.concatenate(
                                         (binding_site_atom_indices,
                                          self.wat_oxygen_atom_ids)))
                if trj.n_frames < self.num_frames:
                    print((
                        "Warning: {0:d} frames found in the trajectory, resetting self.num_frames."
                        .format(trj.n_frames)))
                    self.num_frames = trj.n_frames
            for i_frame in range(trj.n_frames):
                for pseudo_index in range(binding_site_atom_indices.shape[0]):
                    trj.xyz[i_frame,
                            pseudo_index, :] = ligand_coords[pseudo_index, :]
            haystack = np.setdiff1d(trj.topology.select("all"),
                                    binding_site_atom_indices)
            start_point = haystack[0]
            binding_site_waters = md.compute_neighbors(
                trj,
                self.hsa_region_radius,
                binding_site_atom_indices,
                haystack_indices=haystack)
            # From the full frame-wise set of waters in the binding site, build two more frame-wise lists
            # one where each frame has a correct index of waters and another with a new index which ranges from
            # 0 to M, where M is the total number of hsa region waters - 1
            start = 0
            for i in range(len(binding_site_waters)):
                self.hsa_region_O_ids.append([])
                self.hsa_region_flat_ids.append([])
                for wat in binding_site_waters[i]:
                    wat_0 = wat - start_point
                    wat_offset = (
                        wat_0 * self.water_sites) + self.wat_oxygen_atom_ids[0]
                    self.hsa_region_O_ids[i].append(wat_offset)
                    self.hsa_region_flat_ids[i].append(start)
                    start += 3

            water_id_frame_list = [(i, nbr)
                                   for i in range(len(binding_site_waters))
                                   for nbr in binding_site_waters[i]]
            water_coordinates = np.array(
                [trj.xyz[wat[0], wat[1], :] for wat in water_id_frame_list])

        # Initialize array that stores coordinates all water molecules in HSA region, used for entropy calcs
        self.hsa_region_water_coords = np.zeros(
            (len(water_id_frame_list) * 3, 3), dtype=float)
        tree = spatial.cKDTree(water_coordinates)
        nbr_list = tree.query_ball_point(init_cluster_coords, sphere_radius)
        final_cluster_coords = []
        cutoff = int(self.num_frames * density_factor * 0.1401)
        if np.ceil(cutoff) - cutoff <= 0.5:
            cutoff = np.ceil(cutoff)
        else:
            cutoff = np.floor(cutoff)

        # apply refinement if user defined clusters not provided
        if clustercenter_file is None:
            # Step 2: Refinement
            # Initialize variables and data structures
            # Read in the trajectory but only first N solute atoms where N equals the number of ligand atoms
            # plus all water oxygen atoms
            # WARNING: This shifts indices of waters and once they are assigned to clusters, the indices need to
            # be corrected.

            print((
                "Refining initial cluster positions by considering {0:d} frames."
                .format(self.num_frames)))
            # For each cluster, set cluster center equal to geometric center of all waters in the cluster
            site_waters = []
            cluster_index = 1
            for cluster in nbr_list:
                cluster_water_coords = water_coordinates[cluster]
                if len(cluster) > cutoff:
                    near_flag = 0
                    waters_offset = [
                        (water_id_frame_list[wat][0] + self.start_frame,
                         ((water_id_frame_list[wat][1] - start_point) *
                          self.water_sites) + self.wat_oxygen_atom_ids[0])
                        for wat in cluster
                    ]

                    com = np.zeros(3)
                    masses = np.ones(cluster_water_coords.shape[0])
                    masses /= masses.sum()
                    com[:] = water_coordinates[cluster].T.dot(masses)
                    cluster_center = com[:]
                    # Raise flag if the current cluster center is within 1.2 A of existing cluster center
                    for other, coord in enumerate(final_cluster_coords[:-1]):
                        dist = np.linalg.norm(
                            md.utils.in_units_of(cluster_center, "nanometers",
                                                 "angstroms") - coord)
                        if dist < 1.20:
                            near_flag += 1
                    # Only add cluster center if it is at a safe distance from others
                    if near_flag == 0:
                        final_cluster_coords.append(
                            md.utils.in_units_of(cluster_center, "nanometers",
                                                 "angstroms"))
                        site_waters.append(waters_offset)
                        cluster_index += 1
        # otherwise store data for each user defined cluster
        else:
            # For each cluster, set cluster center equal to geometric center of all waters in the cluster
            final_cluster_coords = md.utils.in_units_of(
                init_cluster_coords, "nanometers", "angstroms")
            site_waters = []
            cluster_index = 1
            for cluster in nbr_list:
                waters_offset = [
                    (water_id_frame_list[wat][0] + self.start_frame,
                     ((water_id_frame_list[wat][1] - start_point) *
                      self.water_sites) + self.wat_oxygen_atom_ids[0])
                    for wat in cluster
                ]
                site_waters.append(waters_offset)
                cluster_index += 1

        # Write clustercenter file
        write_watpdb_from_coords("clustercenterfile", final_cluster_coords)
        self.clustercenter_file = "clustercenterfile.pdb"
        print(("Final number of clusters: {0:d}".format(
            len(final_cluster_coords))))
        return np.asarray(final_cluster_coords), site_waters
Exemplo n.º 36
0
    def _train__(self):
        # Init pop and calculate fitness
        pop = [self._create_solution__(minmax=0) for _ in range(self.pop_size)]

        # Find the pathfinder
        pop = sorted(pop, key=lambda temp: temp[self.ID_FIT])
        g_best = deepcopy(pop[0])
        gbest_present = deepcopy(g_best)

        for i in range(self.epoch):
            alpha, beta = np.random.uniform(1, 2, 2)
            A = np.random.uniform(self.domain_range[0],
                                  self.domain_range[1]) * np.exp(
                                      -2 * (i + 1) / self.epoch)

            ## Update the position of pathfinder and check the bound
            temp = gbest_present[self.ID_POS] + 2 * np.random.uniform() * (
                gbest_present[self.ID_POS] - g_best[self.ID_POS]) + A
            temp = self._amend_solution_and_return__(temp)
            fit = self._fitness_model__(temp)
            g_best = deepcopy(gbest_present)
            if fit < gbest_present[self.ID_FIT]:
                gbest_present = [temp, fit]
            pop[0] = deepcopy(gbest_present)

            ## Update positions of members, check the bound and calculate new fitness
            for j in range(1, self.pop_size):
                temp1 = deepcopy(pop[j][self.ID_POS])

                t1 = beta * np.random.uniform() * (gbest_present[self.ID_POS] -
                                                   temp1)
                my_list_idx = np.setxor1d(np.array(range(1, self.pop_size)),
                                          np.array([j]))
                idx = np.random.choice(my_list_idx)
                dist = np.linalg.norm(pop[idx][self.ID_POS] - temp1)
                t2 = alpha * np.random.uniform() * (pop[idx][self.ID_POS] -
                                                    temp1)
                t3 = np.random.uniform(
                    self.domain_range[0], self.domain_range[1],
                    self.problem_size) * (1 -
                                          (i + 1) * 1.0 / self.epoch) * dist
                temp1 += t1 + t2 + t3

                ## Update members
                temp1 = self._amend_solution_and_return__(temp1)
                fit = self._fitness_model__(temp1)
                if fit < pop[j][self.ID_FIT]:
                    pop[j] = [temp1, fit]

            ## Update the best solution found so far (current pathfinder)
            pop = sorted(pop, key=lambda temp: temp[self.ID_FIT])
            current_best = deepcopy(pop[self.ID_MIN_PROBLEM])
            if current_best[self.ID_FIT] < gbest_present[self.ID_FIT]:
                gbest_present = deepcopy(current_best)

            self.loss_train.append(gbest_present[self.ID_FIT])
            if self.print_train:
                print("Generation : {0}, best result so far: {1}".format(
                    i + 1, gbest_present[self.ID_FIT]))

        return gbest_present[self.ID_FIT], self.loss_train
Exemplo n.º 37
0
def main():
    try:
        import sklearn

        if sklearn.__version__ < "0.20":
            gs.fatal("Package python3-scikit-learn 0.20 or newer is not installed")

    except ImportError:
        gs.fatal("Package python3-scikit-learn 0.20 or newer is not installed")

    try:
        import pandas as pd
        
    except ImportError:
        gs.fatal("Package python3-pandas 0.25 or newer is not installed")

    # parser options ---------------------------------------------------------------------------------------------------
    group = options["group"]
    training_map = options["training_map"]
    training_points = options["training_points"]
    field = options["field"]
    model_save = options["save_model"]
    model_name = options["model_name"]
    hyperparams = {
        "penalty": options["penalty"],
        "alpha": options["alpha"],
        "l1_ratio": options["l1_ratio"],
        "C": options["c"],
        "epsilon": options["epsilon"],
        "min_samples_leaf": options["min_samples_leaf"],
        "n_estimators": options["n_estimators"],
        "learning_rate": options["learning_rate"],
        "subsample": options["subsample"],
        "max_depth": options["max_depth"],
        "max_features": options["max_features"],
        "n_neighbors": options["n_neighbors"],
        "weights": options["weights"],
        "hidden_layer_sizes": options["hidden_units"],
    }
    cv = int(options["cv"])
    group_raster = options["group_raster"]
    importances = flags["f"]
    preds_file = options["preds_file"]
    classif_file = options["classif_file"]
    fimp_file = options["fimp_file"]
    param_file = options["param_file"]
    norm_data = flags["s"]
    random_state = int(options["random_state"])
    load_training = options["load_training"]
    save_training = options["save_training"]
    n_jobs = int(options["n_jobs"])
    balance = flags["b"]
    category_maps = option_to_list(options["category_maps"])

    # define estimator -------------------------------------------------------------------------------------------------
    hyperparams, param_grid = process_param_grid(hyperparams)
    estimator, mode = predefined_estimators(
        model_name, random_state, n_jobs, hyperparams
    )

    # remove dict keys that are incompatible for the selected estimator
    estimator_params = estimator.get_params()
    param_grid = {
        key: value for key, value in param_grid.items() if key in estimator_params
    }
    scoring, search_scorer = scoring_metrics(mode)

    # checks of input options ------------------------------------------------------------------------------------------
    if (
        mode == "classification"
        and balance is True
        and model_name not in check_class_weights()
    ):

        gs.warning(model_name + " does not support class weights")
        balance = False

    if mode == "regression" and balance is True:
        gs.warning("Balancing of class weights is only possible for classification")
        balance = False

    if classif_file:
        if cv <= 1:
            gs.fatal(
                "Output of cross-validation global accuracy requires cross-validation cv > 1"
            )
        if not os.path.exists(os.path.dirname(classif_file)):
            gs.fatal("Directory for output file {} does not exist".format(classif_file))

    # feature importance file selected but no cross-validation scheme used
    if importances:
        if sklearn.__version__ < "0.22":
            gs.fatal("Feature importances calculation requires scikit-learn version >= 0.22")

    if fimp_file:
        if importances is False:
            gs.fatal('Output of feature importance requires the "f" flag to be set')
        if not os.path.exists(os.path.dirname(fimp_file)):
            gs.fatal("Directory for output file {} does not exist".format(fimp_file))

    # predictions file selected but no cross-validation scheme used
    if preds_file:
        if cv <= 1:
            gs.fatal(
                "Output of cross-validation predictions requires cross-validation cv > 1"
            )
        if not os.path.exists(os.path.dirname(preds_file)):
            gs.fatal("Directory for output file {} does not exist".format(preds_file))

    # define RasterStack -----------------------------------------------------------------------------------------------
    stack = RasterStack(group=group)

    if category_maps is not None:
        stack.categorical = category_maps

    # extract training data --------------------------------------------------------------------------------------------
    if load_training != "":
        X, y, cat, class_labels, group_id = load_training_data(load_training)

        if class_labels is not None:
            a = pd.DataFrame({"response": y, "labels": class_labels})
            a = a.drop_duplicates().values
            class_labels = {k: v for (k, v) in a}

    else:
        gs.message("Extracting training data")

        if group_raster != "":
            stack.append(group_raster)

        if training_map != "":
            X, y, cat = stack.extract_pixels(training_map)
            y = y.flatten()

            with RasterRow(training_map) as src:
                class_labels = {v: k for (k, v, m) in src.cats}

                if "" in class_labels.values():
                    class_labels = None

        elif training_points != "":
            X, y, cat = stack.extract_points(training_points, field)
            y = y.flatten()
            
            if y.dtype in (np.object_, np.object):
                from sklearn.preprocessing import LabelEncoder
                le = LabelEncoder()
                y = le.fit_transform(y)
                class_labels = {k: v for (k, v) in enumerate(le.classes_)}
            else:
                class_labels = None

        # take group id from last column and remove from predictors
        if group_raster != "":
            group_id = X[:, -1]
            X = np.delete(X, -1, axis=1)
            stack.drop(group_raster)
        else:
            group_id = None

        # check for labelled pixels and training data
        if y.shape[0] == 0 or X.shape[0] == 0:
            gs.fatal(
                "No training pixels or pixels in imagery group "
                "...check computational region"
            )

        from sklearn.utils import shuffle

        if group_id is None:
            X, y, cat = shuffle(X, y, cat, random_state=random_state)
        else:
            X, y, cat, group_id = shuffle(
                X, y, cat, group_id, random_state=random_state
            )

        if save_training != "":
            save_training_data(
                save_training, X, y, cat, class_labels, group_id, stack.names
            )

    # cross validation settings ----------------------------------------------------------------------------------------
    # inner resampling method (cv=2)
    from sklearn.model_selection import GridSearchCV, StratifiedKFold, GroupKFold, KFold

    if any(param_grid) is True:
        if group_id is None and mode == "classification":
            inner = StratifiedKFold(n_splits=2, random_state=random_state)
        elif group_id is None and mode == "regression":
            inner = KFold(n_splits=2, random_state=random_state)
        else:
            inner = GroupKFold(n_splits=2)
    else:
        inner = None

    # outer resampling method (cv=cv)
    if cv > 1:
        if group_id is None and mode == "classification":
            outer = StratifiedKFold(n_splits=cv, random_state=random_state)
        elif group_id is None and mode == "regression":
            outer = KFold(n_splits=cv, random_state=random_state)
        else:
            outer = GroupKFold(n_splits=cv)

    # modify estimators that take sample_weights -----------------------------------------------------------------------
    if balance is True:
        from sklearn.utils import compute_class_weight

        class_weights = compute_class_weight(class_weight="balanced", classes=(y), y=y)
        fit_params = {"sample_weight": class_weights}

    else:
        class_weights = None
        fit_params = {}

    # preprocessing ----------------------------------------------------------------------------------------------------
    from sklearn.pipeline import Pipeline
    from sklearn.compose import ColumnTransformer
    from sklearn.preprocessing import StandardScaler, OneHotEncoder

    # standardization
    if norm_data is True and category_maps is None:
        scaler = StandardScaler()
        trans = ColumnTransformer(
            remainder="passthrough",
            transformers=[("scaling", scaler, np.arange(0, stack.count))],
        )

    # one-hot encoding
    elif norm_data is False and category_maps is not None:
        enc = OneHotEncoder(handle_unknown="ignore", sparse=False)
        trans = ColumnTransformer(
            remainder="passthrough", transformers=[("onehot", enc, stack.categorical)]
        )

    # standardization and one-hot encoding
    elif norm_data is True and category_maps is not None:
        scaler = StandardScaler()
        enc = OneHotEncoder(handle_unknown="ignore", sparse=False)
        trans = ColumnTransformer(
            remainder="passthrough",
            transformers=[
                ("onehot", enc, stack.categorical),
                ("scaling", scaler, np.setxor1d(
                    range(stack.count), stack.categorical).astype('int')),
            ],
        )

    # combine transformers
    if norm_data is True or category_maps is not None:
        estimator = Pipeline([("preprocessing", trans), ("estimator", estimator)])
        param_grid = wrap_named_step(param_grid)
        fit_params = wrap_named_step(fit_params)

    if any(param_grid) is True:
        estimator = GridSearchCV(
            estimator=estimator,
            param_grid=param_grid,
            scoring=search_scorer,
            n_jobs=n_jobs,
            cv=inner,
        )

    # estimator training -----------------------------------------------------------------------------------------------
    gs.message(os.linesep)
    gs.message(("Fitting model using " + model_name))
    if balance is True and group_id is not None:
        estimator.fit(X, y, groups=group_id, **fit_params)
    elif balance is True and group_id is None:
        estimator.fit(X, y, **fit_params)
    else:
        estimator.fit(X, y)

    # message best hyperparameter setup and optionally save using pandas
    if any(param_grid) is True:
        gs.message(os.linesep)
        gs.message("Best parameters:")

        optimal_pars = [
            (k.replace("estimator__", "").replace("selection__", "") + " = " + str(v))
            for (k, v) in estimator.best_params_.items()
        ]

        for i in optimal_pars:
            gs.message(i)

        if param_file != "":
            param_df = pd.DataFrame(estimator.cv_results_)
            param_df.to_csv(param_file)

    # cross-validation -------------------------------------------------------------------------------------------------
    if cv > 1:
        from sklearn.metrics import classification_report
        from sklearn import metrics

        if (
            mode == "classification"
            and cv > np.histogram(y, bins=np.unique(y))[0].min()
        ):
            gs.message(os.linesep)
            gs.fatal(
                "Number of cv folds is greater than number of "
                "samples in some classes"
            )

        gs.message(os.linesep)
        gs.message("Cross validation global performance measures......:")

        if (
            mode == "classification"
            and len(np.unique(y)) == 2
            and all([0, 1] == np.unique(y))
        ):
            scoring["roc_auc"] = metrics.roc_auc_score

        from sklearn.model_selection import cross_val_predict

        preds = cross_val_predict(
            estimator, X, y, group_id, cv=outer, n_jobs=n_jobs, fit_params=fit_params
        )

        test_idx = [test for train, test in outer.split(X, y)]
        n_fold = np.zeros((0,))

        for fold in range(outer.get_n_splits()):
            n_fold = np.hstack((n_fold, np.repeat(fold, test_idx[fold].shape[0])))

        preds = {"y_pred": preds, "y_true": y, "cat": cat, "fold": n_fold}

        preds = pd.DataFrame(data=preds, columns=["y_pred", "y_true", "cat", "fold"])
        gs.message(os.linesep)
        gs.message("Global cross validation scores...")
        gs.message(os.linesep)
        gs.message("Metric \t Mean \t Error")

        for name, func in scoring.items():
            score_mean = (
                preds.groupby("fold")
                .apply(lambda x: func(x["y_true"], x["y_pred"]))
                .mean()
            )

            score_std = (
                preds.groupby("fold")
                .apply(lambda x: func(x["y_true"], x["y_pred"]))
                .std()
            )

            gs.message(
                name + "\t" + str(score_mean.round(3)) + "\t" + str(score_std.round(3))
            )

        if mode == "classification":
            gs.message(os.linesep)
            gs.message("Cross validation class performance measures......:")

            report_str = classification_report(
                y_true=preds["y_true"],
                y_pred=preds["y_pred"],
                sample_weight=class_weights,
                output_dict=False,
            )

            report = classification_report(
                y_true=preds["y_true"],
                y_pred=preds["y_pred"],
                sample_weight=class_weights,
                output_dict=True,
            )
            report = pd.DataFrame(report)

            gs.message(report_str)

            if classif_file != "":
                report.to_csv(classif_file, mode="w", index=True)

        # write cross-validation predictions to csv file
        if preds_file != "":
            preds.to_csv(preds_file, mode="w", index=False)
            text_file = open(preds_file + "t", "w")
            text_file.write('"Real", "Real", "integer", "integer"')
            text_file.close()

    # feature importances ----------------------------------------------------------------------------------------------
    if importances is True:
        from sklearn.inspection import permutation_importance

        fimp = permutation_importance(
            estimator,
            X,
            y,
            scoring=search_scorer,
            n_repeats=5,
            n_jobs=n_jobs,
            random_state=random_state,
        )

        feature_names = deepcopy(stack.names)
        feature_names = [i.split("@")[0] for i in feature_names]

        fimp = pd.DataFrame(
            {
                "feature": feature_names,
                "importance": fimp["importances_mean"],
                "std": fimp["importances_std"],
            }
        )

        gs.message(os.linesep)
        gs.message("Feature importances")
        gs.message("Feature" + "\t" + "Score")

        for index, row in fimp.iterrows():
            gs.message(
                row["feature"] + "\t" + str(row["importance"]) + "\t" + str(row["std"])
            )

        if fimp_file != "":
            fimp.to_csv(fimp_file, index=False)

    # save the fitted model
    import joblib

    joblib.dump((estimator, y, class_labels), model_save)
Exemplo n.º 38
0
import numpy as np

a = np.array([1, 2, 3])
b = np.array([3, 4, 5])

print("a: ", a)
print("b: ", b)

print("1차원 합집합: ", np.union1d(a, b))
print("1차원 교집합: ", np.intersect1d(a, b))
print("1차원 차집합: ", np.setdiff1d(a, b))
print("1차원 대칭차집합(합집합 - 교집합): ", np.setxor1d(a, b))

print("난수 발생(1 개): ", np.random.random(1))
print("난수 발생(3 개): ", np.random.random(3))

print("평균: 0 / 표준편자: 1 / 갯수: 1")
print("난수 발생(1 개): ", np.random.normal(0, 1, 1))

print("평균: 1 / 표준편자: 1 / 갯수: 3")
print("난수 발생(3 개): ", np.random.normal(1, 1, 3))

# 출처: http://expert0226.tistory.com/357 [여름나라겨울이야기]
Exemplo n.º 39
0
    def __getitem__(self, i):

        for n, (image_path, label_path) in enumerate(
                zip(
                    self.image_path_list[i * self.batch_size:(i + 1) *
                                         self.batch_size],
                    self.label_path_list[i * self.batch_size:(i + 1) *
                                         self.batch_size])):

            image = cv2.imread(image_path, 1)
            label = cv2.imread(label_path, 0)
            labels = np.unique(label)

            if self.blur and random.randint(0, 1):
                image = cv2.GaussianBlur(image, (self.blur, self.blur), 0)

            if self.resize_shape and not self.crop_shape:
                image = cv2.resize(image, self.resize_shape)
                label = cv2.resize(label,
                                   self.resize_shape,
                                   interpolation=cv2.INTER_NEAREST)

            if self.crop_shape:
                image, label = _random_crop(image, label, self.crop_shape)

            # Do augmentation
            if self.horizontal_flip and random.randint(0, 1):
                image = cv2.flip(image, 1)
                label = cv2.flip(label, 1)
            if self.vertical_flip and random.randint(0, 1):
                image = cv2.flip(image, 0)
                label = cv2.flip(label, 0)
            if self.brightness:
                factor = 1.0 + random.gauss(mu=0.0, sigma=self.brightness)
                if random.randint(0, 1):
                    factor = 1.0 / factor
                table = np.array([((i / 255.0)**factor) * 255
                                  for i in np.arange(0, 256)]).astype(np.uint8)
                image = cv2.LUT(image, table)
            if self.rotation:
                angle = random.gauss(mu=0.0, sigma=self.rotation)
            else:
                angle = 0.0
            if self.zoom:
                scale = random.gauss(mu=1.0, sigma=self.zoom)
            else:
                scale = 1.0
            if self.rotation or self.zoom:
                M = cv2.getRotationMatrix2D(
                    (image.shape[1] // 2, image.shape[0] // 2), angle, scale)
                image = cv2.warpAffine(image, M,
                                       (image.shape[1], image.shape[0]))
                label = cv2.warpAffine(label, M,
                                       (label.shape[1], label.shape[0]))

            if self.histeq:  # and convert to RGB
                img_yuv = cv2.cvtColor(image, cv2.COLOR_BGR2YUV)
                img_yuv[:, :, 0] = clahe.apply(img_yuv[:, :, 0])
                image = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2BGR)  # to BGR

            label = label.astype('int32')
            for j in np.setxor1d(np.unique(label), labels):
                label[label == j] = self.n_classes

            y = label.flatten()
            y[y > (self.n_classes - 1)] = self.n_classes

            self.Y[n] = np.expand_dims(y, -1)
            self.F[n] = (self.Y[n] != 0).astype(
                'float32')  # get all pixels that aren't background
            valid_pixels = self.F[n][
                self.Y[n] != self.
                n_classes]  # get all pixels (bg and foregroud) that aren't void
            u_classes = np.unique(valid_pixels)
            class_weights = class_weight.compute_class_weight(
                'balanced', u_classes, valid_pixels)
            class_weights = {
                class_id: w
                for class_id, w in zip(u_classes, class_weights)
            }
            if len(class_weights) == 1:  # no bg\no fg
                if 1 in u_classes:
                    class_weights[0] = 0.
                else:
                    class_weights[1] = 0.
            elif not len(class_weights):
                class_weights[0] = 0.
                class_weights[1] = 0.

            sw_valid = np.ones(y.shape)
            np.putmask(sw_valid, self.Y[n] == 0,
                       class_weights[0])  # background weights
            np.putmask(sw_valid, self.F[n],
                       class_weights[1])  # foreground wegihts
            np.putmask(sw_valid, self.Y[n] == self.n_classes, 0)
            self.F_SW[n] = sw_valid
            self.X[n] = image

            # Create adaptive pixels weights
            filt_y = y[y != self.n_classes]
            u_classes = np.unique(filt_y)
            if len(u_classes):
                class_weights = class_weight.compute_class_weight(
                    'balanced', u_classes, filt_y)
                class_weights = {
                    class_id: w
                    for class_id, w in zip(u_classes, class_weights)
                }
            class_weights[self.n_classes] = 0.
            for yy in u_classes:
                np.putmask(self.SW[n], y == yy, class_weights[yy])

            np.putmask(self.SW[n], y == self.n_classes, 0)

        sample_dict = {'pred_mask': self.SW}
        return self.X, self.Y, sample_dict
Exemplo n.º 40
0
    def __init__(self,
                 folder='/workspace/datasets/',
                 mode='train',
                 n_classes=21,
                 batch_size=1,
                 resize_shape=None,
                 validation_split=.1,
                 seed=7,
                 crop_shape=(640, 320),
                 horizontal_flip=True,
                 blur=0,
                 vertical_flip=0,
                 brightness=0.1,
                 rotation=5.0,
                 zoom=0.1,
                 do_ahisteq=True):

        self.blur = blur
        self.histeq = do_ahisteq
        self.image_path_list = sorted(
            glob.glob(os.path.join(folder, 'JPEGImages', 'train', '*')))
        self.label_path_list = sorted(
            glob.glob(os.path.join(folder, 'SegmentationClassAug', '*')))

        np.random.seed(seed)

        n_images_to_select = round(
            len(self.image_path_list) * validation_split)
        x = np.random.permutation(len(
            self.image_path_list))[:n_images_to_select]
        if mode == 'train':
            x = np.setxor1d(x, np.arange(len(self.image_path_list)))

        self.image_path_list = [self.image_path_list[j] for j in x]
        self.label_path_list = [self.label_path_list[j] for j in x]

        if mode == 'test':
            self.image_path_list = sorted(
                glob.glob(os.path.join(folder, 'JPEGImages', 'test',
                                       '*')))[:100]

        self.mode = mode
        self.n_classes = n_classes
        self.batch_size = batch_size
        self.resize_shape = resize_shape
        self.crop_shape = crop_shape
        self.horizontal_flip = horizontal_flip
        self.vertical_flip = vertical_flip
        self.brightness = brightness
        self.rotation = rotation
        self.zoom = zoom
        # Preallocate memory
        if self.crop_shape:
            self.X = np.zeros((batch_size, crop_shape[1], crop_shape[0], 3),
                              dtype='float32')
            self.SW = np.zeros((batch_size, crop_shape[1] * crop_shape[0]),
                               dtype='float32')
            self.Y = np.zeros((batch_size, crop_shape[1] * crop_shape[0], 1),
                              dtype='float32')
            self.F = np.zeros((batch_size, crop_shape[1] * crop_shape[0], 1),
                              dtype='float32')
            self.F_SW = np.zeros((batch_size, crop_shape[1] * crop_shape[0]),
                                 dtype='float32')
        elif self.resize_shape:
            self.X = np.zeros(
                (batch_size, resize_shape[1], resize_shape[0], 3),
                dtype='float32')
            self.SW = np.zeros((batch_size, resize_shape[1] * resize_shape[0]),
                               dtype='float32')
            self.Y = np.zeros(
                (batch_size, resize_shape[1] * resize_shape[0], 1),
                dtype='float32')
            self.F = np.zeros(
                (batch_size, resize_shape[1] * resize_shape[0], 1),
                dtype='float32')
            self.F_SW = np.zeros(
                (batch_size, resize_shape[1] * resize_shape[0]),
                dtype='float32')
        else:
            raise Exception('No image dimensions specified!')
Exemplo n.º 41
0
 def within_index(self, *args, **kwargs):
     A = self.A.within_index(*args, **kwargs)
     B = self.B.within_index(*args, **kwargs)
     return setxor1d(A, B, assume_unique=True)
Exemplo n.º 42
0
def get_not_indexes(a, indices):
    """only works for 1D"""
    ia = np.indices(a.shape)
    not_indices = np.setxor1d(ia, indices)
    return not_indices
Exemplo n.º 43
0
'''
@Author: Sankar
@Date: 2021-04-14 08:49:25
@Last Modified by: Sankar
@Last Modified time: 2021-04-14 08:55:09
@Title : Numpy_Python-14
'''
'''
Write a Python program to find the set exclusive-or of two arrays. Set exclusive-or
will return the sorted, unique values that are in only one (not both) of the input arrays.
Array1: [ 0 10 20 40 60 80]
Array2: [10, 30, 40, 50, 70]
Unique values that are in only one (not both) of the input arrays:
[ 0 20 30 50 60 70 80]
'''
import numpy as np
arr1 = np.array([0, 10, 20, 40, 60, 80])
arr2 = np.array([10, 30, 40, 50, 70])

print(np.setxor1d(arr1, arr2))
Exemplo n.º 44
0
# возвращает вместе с индексами
ans = s1[~s1.isin(s2)]

# возвращает значения
ans2 = np.setdiff1d(s1, s2, assume_unique=False)
print(ans)

# In[20]:

# 7. Получить не пересекающиеся элементы в двух объектах Series

s1 = pd.Series([1, 2, 3, 4, 5])
s2 = pd.Series([4, 5, 6, 7, 8])

# возвращает вместе с индексами

# получаем объединенный Series без повтороений
s_union = pd.Series(np.union1d(s1, s2))
# получаем пересекающиеся данные
s_intersect = pd.Series(np.intersect1d(s1, s2))
# отбираем все данные, кроме пересекающихся
ans = s_union[~s_union.isin(s_intersect)]

# возвращает значения
ans2 = np.setxor1d(s1, s2, assume_unique=False)

print(ans)

# In[ ]:
Exemplo n.º 45
0
def disjoint_sets(subtraceA, subtraceB):
    x = len(subtraceA.pages) - len(subtraceB.pages)
    y = len(np.setxor1d(subtraceA.pages, subtraceB.pages))
    return sqrt(x**2 + y**2)
def generate_data(cur_target_class_ids,
                  full_target_class_ids,
                  data,
                  labels,
                  refining,
                  seed=123):

    np.random.seed(seed)

    num_of_each_class = 5000

    n_final_classes_including_other = 10
    full_class_ids = np.arange(1, n_final_classes_including_other + 1)

    if refining == True:
        other_class_ids = np.setxor1d(full_class_ids, cur_target_class_ids)
    else:
        other_class_ids = np.setxor1d(full_class_ids, full_target_class_ids)

    labels_copy = np.copy(labels)

    target_class_indices = np.array([], dtype='int32')
    for class_id in cur_target_class_ids:
        target_class_indices = np.append(
            target_class_indices,
            np.argwhere(labels_copy == class_id - 1)[:, 0])
    # TODO: temp code
    np.random.shuffle(target_class_indices)
    print(target_class_indices[:10])
    # create new train and test datasets and labels
    target_class_data = data[target_class_indices, :]
    target_class_labels = np.squeeze(labels_copy[target_class_indices, :])
    target_class_labels_copy = np.copy(target_class_labels)

    # Reasign the index of target-classes, starting from 1
    for i, class_id in enumerate(cur_target_class_ids):
        target_class_labels[np.argwhere(target_class_labels_copy == class_id -
                                        1)[:, 0]] = i + 1

    # Other-class
    all_other_class_indices = np.array([], dtype='int32')
    for class_id in other_class_ids:
        if class_id == other_class_ids[0]:  #TODO
            all_other_class_indices = np.append(
                all_other_class_indices,
                np.argwhere(labels_copy == class_id - 1)[:, 0])

    print('current other class: ' + str(other_class_ids))
    print('current target class: ' + str(cur_target_class_ids))
    print('all target target class: ' + str(full_target_class_ids))
    print('all class: ' + str(full_class_ids))
    print('all other class indices: ' + str(len(all_other_class_indices)))
    other_class_indices = np.random.choice(all_other_class_indices,
                                           num_of_each_class)

    other_class_data = data[other_class_indices, :]

    # set 'other' label to zero
    other_class_labels = np.array([0] * num_of_each_class)

    print(target_class_labels.shape, other_class_labels.shape)

    selected_data = np.concatenate((target_class_data, other_class_data))
    selected_labels = np.concatenate((target_class_labels, other_class_labels))

    return (selected_data, selected_labels)
Exemplo n.º 47
0
#%% data preparation
# load the matlab data
earth_data = sio.loadmat('head.mat')
faaut = np.array(earth_data['faaut']).reshape(-1).astype('int')
faman = np.array(earth_data['faman']).reshape(-1).astype('int')
off = np.array(earth_data['off']).reshape(-1).astype('int')
rx = np.array(earth_data['rx']).reshape(-1).astype('int')
ry = np.array(earth_data['ry']).reshape(-1).astype('int')
sx = np.array(earth_data['sx']).reshape(-1).astype('int')
sy = np.array(earth_data['sy']).reshape(-1).astype('int')

# seperate the uncertain and certain data index
uct_idx = np.where(faman < 0)[0]  # uncertain index
uct_pct = len(uct_idx) * 1.0 / len(faman)  # uncertain percentage
idx_all = np.arange(len(faman))
cet_idx = np.setxor1d(idx_all, uct_idx)  # certain index
#
all_data = np.zeros((len(faaut), 7))
all_data[:, 0] = off
all_data[:, 1] = rx
all_data[:, 2] = ry
all_data[:, 3] = sx
all_data[:, 4] = sy
all_data[:, 5] = faaut
all_data[:, 6] = faman
all_data_df = pd.DataFrame(
    all_data, columns=['off', 'rx', 'ry', 'sx', 'sy', 'faaut', 'faman'])
#
certain_data = all_data[cet_idx, :]
uncertain_data = all_data[uct_idx, :]
#
Exemplo n.º 48
0
def kmeans(path):
    folders = os.listdir(
        path)  #Se crea una lista con los nombres de todas las carpetas dentro
    folders = np.asarray(folders)
    files = [
    ]  #inicializa lista para los nombres de los archivos pertenecientes a cada carpeta

    for f in folders:
        provisional_path = path + "/" + f  #Se inicializa variable con la dirección anterior de la variable 'path' + el nombre de una carpeta 'x' dentro de StopSearch_2011_2017
        n_classes = get_n_classes(provisional_path)
        files.append([provisional_path, n_classes])

    files = np.asarray(files)
    count = 0
    file_index = 0

    #results=[]
    final_tags = []

    for i in range(0, len(files)):
        file_results = []
        prom_folds = []
        for K in range(3, 9):
            print("i: " + str(i) + ", K: " + str(K))
            file_name = folders[i]
            X = np.asarray(get_data(files[i][0]))
            X = soft_max(X)
            #print(X)
            #time.sleep(5)
            file_tags = get_tags(files[i][0])

            X_shape = np.shape(X)
            k_index = np.random.choice(
                int(X_shape[0]), int(K), replace=False
            )  #Selecciona indices aleatorios de elementos de la matriz original "X"
            centroids = X[k_index, :]  #Asigna centroides aleatorios
            matriz_indx = matriz_indices(len(X), len(centroids))
            matriz_distancias = distances_matrix(X, centroids)
            sort_index(matriz_distancias, matriz_indx)

            clusters = iniciar_clusters(X, matriz_indx, K)
            old_centroids = centroids
            new_centroids = calculate_new_centroids(centroids, clusters)
            j = 0
            while (olds_vs_news(old_centroids, new_centroids)):
                matriz_indx = matriz_indices(len(X), len(centroids))
                matriz_distancias = distances_matrix(X, centroids)
                sort_index(matriz_distancias, matriz_indx)
                clusters = iniciar_clusters(X, matriz_indx, K)
                old_centroids = centroids
                new_centroids = calculate_new_centroids(centroids, clusters)
                centroids = new_centroids
                j += 1
            #file_results.append(db_index(np.asarray(centroids),np.asarray(clusters),len(X)))
            #time.sleep(5)
            print("\n\n-------------------------------------\n\n")
            for element in matriz_indx:
                final_tags.append(element[0])

            data = X

            #time.sleep(5)

            k_folds = 5
            all_index = np.arange(0, len(data))
            min_limit, max_limit = 0, mt.ceil(len(data) / float(k_folds))
            indices_list = np.arange(min_limit, max_limit)
            radios_list = radios(centroids)
            Y = norm_Y(file_tags)
            const_limit = mt.ceil(len(data) / float(k_folds))
            cont_mcc = 0
            for kf in range(0, k_folds):
                print("fold: " + str(kf + 1))
                #print(str(min_limit)+","+str(max_limit))
                fold = np.arange(int(min_limit), int(max_limit))
                #print(fold)
                i_proof = data[fold, :]
                i_proof_y = Y[fold]
                i_proof = np.asarray(i_proof)
                i_proof_y = np.asarray(i_proof_y)
                train_index = np.setxor1d(fold, all_index)
                i_train = data[train_index, :]
                i_train_y = Y[train_index]

                phi_1 = []
                phi_1.append(
                    np.reshape(np.kron(np.ones((len(i_train), 1)), [1]),
                               (len(i_train))))  #ADD BIAS

                for cn in range(0, len(centroids)):
                    phi_1.append(RBF(i_train, centroids[cn], radios_list[cn]))
                phi_1 = np.asarray(phi_1)
                phi_1 = np.linalg.pinv(phi_1)
                phi_1 = np.transpose(phi_1)
                #print(i_train_y)
                m_target = Ytarget(Y, i_train_y)
                """print(m_target)
                print("len: "+str(len(m_target)))
                time.sleep(5)"""
                W = M_1xM_2(phi_1, m_target)
                W = np.transpose(W)
                phi_2 = []
                phi_2.append(
                    np.reshape(np.kron(np.ones((len(i_proof), 1)), [1]),
                               (len(i_proof))))
                for cn in range(0, len(centroids)):
                    phi_2.append(RBF(i_proof, centroids[cn], radios_list[cn]))
                phi_2 = np.asarray(phi_2)
                phi_2 = np.transpose(phi_2)

                y_net = []
                #np_unhs=list(i_proof_y)
                len_set = len(list(set(Y)))
                #print(len_set)
                for d in range(0, len(phi_2)):
                    x = phi_2[d]
                    x = x.reshape(1, len(x))
                    x = np.transpose(x)
                    z = M_1xM_2(W, x)
                    x_tags = matriz_indices(1, len_set)
                    z = np.transpose(z)
                    sort_index(z, x_tags)
                    y_net.append(x_tags[0][len(x_tags) - 1])
                y_net = np.asarray(y_net)
                cont_mcc += matthews_corrcoef(i_proof_y, y_net)
                min_limit = max_limit
                max_limit += const_limit
                if (max_limit > len(data)):
                    max_limit = len(data) - 1
            prom_folds.append(cont_mcc / 5)

        #GRAFICAR PROM_FOLDS a este nivel
        prom_folds = np.asarray(prom_folds)
        labels_k = ["k-3", "k-4", "k-5", "k-6", "k-7", "k-8"]
        #labels_x=[]
        index_mcc = np.arange(len(labels_k))
        #print("shape folds")
        #print(np.shape(prom_folds))
        #print(prom_folds)
        time.sleep(5)
        plt.subplots(figsize=(9, 6))
        plt.xticks(index_mcc, labels_k, rotation="vertical")
        #plt.yticks(axis_y,labels_y)

        n = 0
        plt.plot(prom_folds,
                 color="r",
                 marker="o",
                 linestyle='--',
                 label="MCC")
        plt.title(folders[i])
        plt.savefig(folders[i] + ".png")
        #plt.show()
        plt.close()
Exemplo n.º 49
0
#  Write a NumPy program to find the set exclusive-or of two arrays. Set exclusive-or will return the sorted, unique values that are in only one (not both) of the input arrays.

import numpy as np
array1=np.array([0,10,20,40,60,80])
print("array1:\n",array1)
array2=np.array([10, 30, 40, 50, 70, 90])
print("array1:\n",array2)
print("resultant array\n",np.setxor1d(array1,array2))
Exemplo n.º 50
0
import numpy as np

arr = np.array([1, 1, 1, 2, 3, 4, 5, 5, 6, 7])
x = np.unique(arr)
print(x)

arr1 = np.array([1, 2, 3, 4])
arr2 = np.array([3, 4, 5, 6])
newarr = np.union1d(arr1, arr2)
print(newarr)

arr1 = np.array([1, 2, 3, 4])
arr2 = np.array([3, 4, 5, 6])
newarr = np.intersect1d(arr1, arr2, assume_unique=True)
print(newarr)

set1 = np.array([1, 2, 3, 4])
set2 = np.array([3, 4, 5, 6])
newarr = np.setdiff1d(set1, set2, assume_unique=True)
print(newarr)

set1 = np.array([1, 2, 3, 4])
set2 = np.array([3, 4, 5, 6])
newarr = np.setxor1d(set1, set2, assume_unique=True)
print(newarr)
Exemplo n.º 51
0
total = round(sum(cargas) / 3)  # El valor deseado para la suma de los grupos.
numero_grupos = 3

# Se crean los grupos

resultados = [None] * numero_grupos

for i in range(1, numero_grupos):

    data = grupos_suma(cargas, total)
    seen = set()
    result = []
    for d in data:  # Se eliminan los grupos que solo sean permutaciones de otro.
        if frozenset(d) not in seen:
            result.append(d)
            seen.add(frozenset(d))

    resultados[i] = result[
        0]  # Se van a crean muchos grupos que cumplan con la suma, solo se toma el primero

    cargas = np.setxor1d(list(cargas),
                         resultados[i])  # Cargas sin acomodar aún.

print()
print(f'Cargas: {cargas_original}')
print()
print("Grupos:")

for i in range(1, numero_grupos):
    print(resultados[i])
    print(f'Suma: {round(sum(resultados[i]),2)}')
Exemplo n.º 52
0
def setminus(a, b):
    a = np.array(a)
    b = np.array(b)
    intersect = np.intersect1d(a, b)
    diff = np.setxor1d(a, intersect)
    return diff
Exemplo n.º 53
0
# 集合运算
if __name__ == '__main__':
    # 5.6 唯一和集合逻辑
    names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
    np.unique(names)  # 取唯一后进行排序

    # 集合的函数
    values = np.array([6, 0, 0, 3, 2, 5, 6])
    np.in1d(values, [2, 3, 6])  # values的每个元素是否在[2, 3, 6]中。
    # 返回array([ True, False, False,  True,  True, False,  True], dtype=bool)
    np.unique(values)  # 唯一元素
    np.intersect1d(values, values)  # 交集
    np.union1d(values, values)  # 并集
    np.setdiff1d(values, values)  # 前-后的差集
    np.setxor1d(values, values)  # 对称差。


# 二、np的高级应用
# 1、判断ndarray的数据类型
if __name__ == '__main__':
    ints = np.ones(10, dtype=np.uint16)
    floats = np.ones(10, dtype=np.float32)
    np.issubdtype(ints.dtype, np.integer)  # 判断数据类型
    np.issubdtype(floats.dtype, np.floating)

    np.float64.mro()  # np.float64所有的父类


# 2、重塑
if __name__ == '__main__':
Exemplo n.º 54
0
print(type(arr), arr)

#appent values at the end of arrays
arr = array("i", [1,5,9,6,5,4,8])
print(arr)
arr = np.append(arr,[55,80])
print(arr)

#intersection of two array
array1 = np.array([1,5,8,9,6,2,4])
array2 = np.array([8,9,45,65])
print(np.intersect1d(array1, array2))

#Differance of two array
array1 = np.array([1,5,8,9,6,2,4])
array2 = np.array([8,9,45,65])
print(np.setdiff1d(array1, array2))

#Unique of two array
array1 = np.array([1,5,8,9,6,2,4])
array2 = np.array([8,9,45,65])
print(np.setxor1d(array1, array2))

#Compair two array
a = [1, 2]
b = [4, 5]
print(np.greater_equal(a, b))
print(np.greater(a, b))
print(np.less_equal(a, b))
print(np.less(a, b))
Exemplo n.º 55
0
def findDuplicateVectors(vec, tol=vTol, equivPM=False):
    """
    Find vectors in an array that are equivalent to within
    a specified tolerance

      USAGE:

          eqv = DuplicateVectors(vec, *tol)

      INPUT:

          1) vec is n x m, a double array of m horizontally concatenated
                           n-dimensional vectors.
         *2) tol is 1 x 1, a scalar tolerance.  If not specified, the default
                           tolerance is 1e-14.
         *3) set equivPM to True if vec and -vec
             are to be treated as equivalent

      OUTPUT:

          1) eqv is 1 x p, a list of p equivalence relationships.

      NOTES:

          Each equivalence relationship is a 1 x q vector of indices that
          represent the locations of duplicate columns/entries in the array
          vec.  For example:

                | 1     2     2     2     1     2     7 |
          vec = |                                       |
                | 2     3     5     3     2     3     3 |

          eqv = [[1x2 double]    [1x3 double]], where

          eqv[0] = [0  4]
          eqv[1] = [1  3  5]
    """

    vlen = vec.shape[1]
    vlen0 = vlen
    orid = np.asarray(list(range(vlen)), dtype="int")

    torid = orid.copy()
    tvec = vec.copy()

    eqv = []
    eqvTot = 0
    uid = 0

    ii = 1
    while vlen > 1 and ii < vlen0:
        dupl = np.tile(tvec[:, 0], (vlen, 1))

        if not equivPM:
            diff = abs(tvec - dupl.T).sum(0)
            match = abs(diff[1:]) <= tol  # logical to find duplicates
        else:
            diffn = abs(tvec - dupl.T).sum(0)
            matchn = abs(diffn[1:]) <= tol
            diffp = abs(tvec + dupl.T).sum(0)
            matchp = abs(diffp[1:]) <= tol
            match = matchn + matchp

        kick = np.hstack([True, match])  # pick self too

        if kick.sum() > 1:
            eqv += [torid[kick].tolist()]
            eqvTot = np.hstack([eqvTot, torid[kick]])
            uid = np.hstack([uid, torid[kick][0]])

        cmask = np.ones((vlen, ))
        cmask[kick] = 0
        cmask = cmask != 0

        tvec = tvec[:, cmask]

        torid = torid[cmask]

        vlen = tvec.shape[1]

        ii += 1

    if len(eqv) == 0:
        eqvTot = []
        uid = []
    else:
        eqvTot = eqvTot[1:].tolist()
        uid = uid[1:].tolist()

    # find all single-instance vectors
    singles = np.sort(np.setxor1d(eqvTot, list(range(vlen0))))

    # now construct list of unique vector column indices
    uid = np.int_(np.sort(np.union1d(uid, singles))).tolist()
    # make sure is a 1D list
    if not hasattr(uid, '__len__'):
        uid = [uid]

    return eqv, uid
Exemplo n.º 56
0
    def arithmetical_operation(self, a, operation, in_place=False):
        """
        Performs given arithmetical operation with :math:`a` operand, the
        operation can be either performed on a copy or in-place.

        Parameters
        ----------
        a : numeric or ndarray or Signal
            Operand.
        operation : object
            Operation to perform.
        in_place : bool, optional
            Operation happens in place.

        Returns
        -------
        Signal
            Continuous signal.

        Examples
        --------
        Adding a single *numeric* variable:

        >>> range_ = np.linspace(10, 100, 10)
        >>> signal_1 = Signal(range_)
        >>> print(signal_1)
        [[   0.   10.]
         [   1.   20.]
         [   2.   30.]
         [   3.   40.]
         [   4.   50.]
         [   5.   60.]
         [   6.   70.]
         [   7.   80.]
         [   8.   90.]
         [   9.  100.]]
        >>> print(signal_1.arithmetical_operation(10, '+', True))
        [[   0.   20.]
         [   1.   30.]
         [   2.   40.]
         [   3.   50.]
         [   4.   60.]
         [   5.   70.]
         [   6.   80.]
         [   7.   90.]
         [   8.  100.]
         [   9.  110.]]

        Adding an *array_like* variable:

        >>> a = np.linspace(10, 100, 10)
        >>> print(signal_1.arithmetical_operation(a, '+', True))
        [[   0.   30.]
         [   1.   50.]
         [   2.   70.]
         [   3.   90.]
         [   4.  110.]
         [   5.  130.]
         [   6.  150.]
         [   7.  170.]
         [   8.  190.]
         [   9.  210.]]

        Adding a :class:`colour.continuous.Signal` class:

        >>> signal_2 = Signal(range_)
        >>> print(signal_1.arithmetical_operation(signal_2, '+', True))
        [[   0.   40.]
         [   1.   70.]
         [   2.  100.]
         [   3.  130.]
         [   4.  160.]
         [   5.  190.]
         [   6.  220.]
         [   7.  250.]
         [   8.  280.]
         [   9.  310.]]
        """

        operation, ioperator = {
            '+': (add, iadd),
            '-': (sub, isub),
            '*': (mul, imul),
            '/': (div, idiv),
            '**': (pow, ipow)
        }[operation]

        if in_place:
            if isinstance(a, Signal):
                self[self._domain] = operation(self._range, a[self._domain])
                exclusive_or = np.setxor1d(self._domain, a.domain)
                self[exclusive_or] = np.full(exclusive_or.shape, np.nan)
            else:
                self.range = ioperator(self.range, a)

            return self
        else:
            copy = ioperator(self.copy(), a)

            return copy
# set arrays should only be 1-d arrays.
import numpy as np

x1 = np.array([1, 1, 1, 3, 4, 4, 6, 6, 8, 4, 3, 5, 7, 3, 2, 5, 6])
x2 = np.unique(x1)  # finding unique elements from set of array
print(x2)

x3 = np.array([1, 1, 1, 3, 4, 4, 6, 6])
x4 = np.array([8, 4, 3, 5, 7, 3, 2, 5, 6])
x5 = np.union1d(x3, x4)  # finding unique elements from both arrays
print(x5)

x6 = np.array([1, 1, 1, 3, 4, 4, 6, 6])
x7 = np.array([8, 4, 3, 5, 7, 3, 2, 5, 6])
x8 = np.intersect1d(x6, x7,
                    assume_unique=True)  # assume_unique speed up computation
print(x8)  # finding similar values from both arrays

y1 = np.array([1, 1, 1, 3, 4, 4, 6, 6])
y2 = np.array([8, 4, 3, 5, 7, 3, 2, 5, 6])
y3 = np.setdiff1d(
    y1, y2, assume_unique=True
)  # finding only values of first set that is not present in second set
print(y3)

y4 = np.array([1, 1, 1, 3, 4, 4, 6, 6])
y5 = np.array([8, 4, 3, 5, 7, 3, 2, 5, 6])
y6 = np.setxor1d(
    y4, y5, assume_unique=True)  # finding values that present in both sets
print(y6)
Exemplo n.º 58
0
def pointsFromShapes(shapes,
                     bounds,
                     dx=10.0,
                     nmax=None,
                     Nsamp=None,
                     touch_center=True):
    """Get yes/no points from shapefile input - same as sampleFromShapes but without class balance or separation of test and train, only samples in box enclosing the polygons

    :param shapes:
       Sequence of projected shapes.
    :param bounds:
        Tuple of xmin, ymin, xmax, ymax, in lat/lon coordinates, only will accept points from within these bounds
    :param dx:
       resolution of sampling in X and Y (meters), must be a round number of meters
    :param nmax:
      if not None, maximum allowed number of mesh points in X and Y together (nrows*ncols).  Overrides dx.
    :param Nsamp:
      if not None, maximum number of total samples, keeps proportion of yes's and no's the same
    :param touch_center:
      Boolean indicating whether presence of polygon in each grid cell is enough to turn that
      into a yes pixel.  Setting this to false presumes that the dx is relatively large, such
      that creating a grid at that resolution will not tax the resources of the system.
    :returns:
      - sequence of coordinates in lat/lon for: YesPoints, NoPoints
      - numpy array of mesh column centers
      - numpy array of mesh row centers
      - PyProj object defining orthographic projection of xy points

    """
    xmin, ymin, xmax, ymax = bounds
    shptype = shapes[0]['geometry']['type']
    if shptype not in ['Polygon']:
        raise Exception('Only polygon data types supported!')

    #Get the shapes projected into an orthographic projection centered on the data
    pshapes, proj = getProjectedShapes(shapes, xmin, xmax, ymin, ymax)

    # Get the projected bounds
    project = partial(pyproj.transform,
                      pyproj.Proj(proj='latlong', datum='WGS84'), proj)
    bbPoly = Polygon(((xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin)))
    bbPolyproj = transform(project, bbPoly)

    if Nsamp is not None:  # Recompute dx, underestimate by dividing by 1.5 so later trimming doesn't reduce desired total
        projbounds = bbPolyproj.bounds
        dx = np.round(
            np.sqrt(((projbounds[2] - projbounds[0]) *
                     (projbounds[3] - projbounds[1])) / (Nsamp)) / 1.5)

    #get the "yes" sample points
    yespoints, nrows, ncols, xvar, yvar, yesidx = getYesPoints(
        pshapes, proj, dx, nmax=nmax, touch_center=touch_center)

    # sampleNo but with taking all of the points instead of just some of them randomly
    allidx = np.arange(0,
                       len(xvar) *
                       len(yvar))  # flattened array of all indices in mesh
    noidx = np.setxor1d(allidx, yesidx)  # allidx - avoididx
    rowidx, colidx = np.unravel_index(noidx, (len(yvar), len(xvar)))
    nopoints = []
    for row, col in zip(rowidx, colidx):
        xp = xvar[col]
        yp = yvar[row]
        nopoints.append((xp, yp))
    nopoints = np.array(nopoints)

    # Only accept points inside the bounds
    bbPath = mplPath.Path(
        (list(zip(*np.array(bbPolyproj.exterior.coords.xy)))))
    yespoints = yespoints[bbPath.contains_points(yespoints)]
    nopoints = nopoints[bbPath.contains_points(nopoints)]
    totalpoints = (len(nopoints) + len(yespoints))

    if Nsamp is not None and totalpoints > Nsamp:
        ratioyes = float(len(yespoints)) / totalpoints
        keepy = np.round(ratioyes * Nsamp)
        indy = np.random.randint(0, len(yespoints), size=keepy)
        indn = np.random.randint(0, len(nopoints), size=Nsamp - keepy)
        yespoints = yespoints[indy, :]
        nopoints = nopoints[indn, :]

    elif totalpoints < Nsamp:
        print((
            'Only collected %1.0f points out of desired %1.0f points due to bound restrictions'
            % (totalpoints, Nsamp)))

    #project all of the point data sets back to lat/lon
    yespoints = projectBack(yespoints, proj)
    nopoints = projectBack(nopoints, proj)

    return (yespoints, nopoints, xvar, yvar, pshapes, proj)
import numpy as np
try:
    array = np.array([0, 10, 20, 40, 60, 80])
    print(array)
    array1 = np.array([10, 30, 40, 50, 70, 90])
    print(array1)
    array3 = np.setxor1d(array, array1)
    print("The array obtained after exclusive or operation is: ", array3)
except:
    print("Syntax error")
def plot_heavy_bars(ax,
                    h_attr_arr,
                    h_clients_arr,
                    nh_attr_arr,
                    nh_clients_arr,
                    xlabel,
                    xulim,
                    yulim,
                    title,
                    width=0.4,
                    xllim=0,
                    yllim=0,
                    conv_xaxis=False,
                    pct_label=True,
                    xlabel_ra=False):
    """
    Given an axis, list of 12 arrays and graph variables, prepare a bar chart graph.
    
    Params:
    ax (axis): axis for the plot
    h_attr_arr (array): heavy attribute array
    h_clients_arr (array): heavy number of clients array
    nh_attr_arr (array): non-heavy attribute array
    nh_clients_arr (array): non-heavy number of clients array
    xlabel (string): x-axis label
    xulim (number): upper limit for x-axis
    yulim (number): upper limit for y-axis
    title (string): title for the plot    
    width (number): width of bars (default 0.4)
    xllim (number): lower limit for x-axis (default 0)
    yllim (number): lower limit for y-axis (default 0)
    conv_xaxis (boolean): convert the x-axis arrays to number (default False)
    pct_label (boolean): add the percent label on top of the bar (default True)
    xlabel_ra (boolean): align the xlabel to the right, for long labels (default False)
    """

    if (conv_xaxis):
        xllim = 1
        if (len(h_attr_arr) != len(nh_attr_arr)):
            print 'Mismatch in xarray lengths!'
            extras = np.setxor1d(h_attr_arr, nh_attr_arr)
            for item in extras:
                print item
                if item in nh_attr_arr:  # extra item in non-heavy array
                    nhidx = nh_attr_arr.tolist().index(item)
                    print 'Delete ' + item + ' from non-heavy array'
                    nh_attr_arr = np.delete(nh_attr_arr, nhidx)
                    nh_clients_arr = np.delete(nh_clients_arr, nhidx)
                else:  # extra item in heavy array
                    hidx = h_attr_arr.tolist().index(item)
                    print('Insert ' + item + ' into non-heavy array')
                    nh_attr_arr = np.insert(nh_attr_arr, hidx, item)
                    nh_clients_arr = np.insert(nh_clients_arr, hidx, 0)

        xticks = array(list(range(1, len(h_attr_arr) + 1)))
        ax.set_xticks(xticks)
        if (xlabel_ra):
            ax.set_xticklabels(h_attr_arr, rotation=45, ha='right')
        else:
            ax.set_xticklabels(h_attr_arr, rotation=45)
        h_attr_arr = xticks
        nh_attr_arr = xticks

    # the bar chart
    nhbars = ax.bar(nh_attr_arr - width / 2,
                    nh_clients_arr,
                    width,
                    label='Non Heavy',
                    color='lightsalmon')
    hbars = ax.bar(h_attr_arr + width / 2,
                   h_clients_arr,
                   width,
                   label='Heavy',
                   color='mediumaquamarine')
    ax.set_xlabel(xlabel)
    ax.set_xlim(xllim - 1, xulim)
    ax.set_ylabel('Number of Clients')
    ax.yaxis.grid(True,
                  linestyle='-',
                  which='major',
                  color='lightgrey',
                  alpha=0.5)
    ax.set_ylim(yllim, yulim)
    ax.set_title(title)
    ax.legend(loc='best')
    yrange = yulim - yllim

    if (pct_label):
        # Add percent labels above the heavy bars
        for hrect, nhrect in zip(hbars, nhbars):
            xloc = hrect.get_x()
            if (xloc >= xllim and xloc <= (xulim - 1)):
                hheight = hrect.get_height()
                nhheight = nhrect.get_height()
                htext = hheight * 1.0 / (hheight + nhheight) * 100
                ax.text(xloc + hrect.get_width() / 2.0,
                        hheight + .01 * yrange,
                        '{0:,.2f}%'.format(htext),
                        ha='center',
                        va='bottom',
                        rotation='vertical',
                        fontsize=8)

    return None