def test_hierarchical_dev_expansion(): """Test that hierarchical dev expansion is correct""" game = rsgame.empty([9, 16], [4, 3]) mask = [True, False, True, False, False, True, False] profs = hr.expand_deviation_profiles(game, mask, [3, 4]) actual = utils.axis_to_elem(profs) expected = utils.axis_to_elem([ [6, 3, 0, 0, 0, 16, 0], [3, 3, 3, 0, 0, 16, 0], [0, 3, 6, 0, 0, 16, 0], [6, 0, 0, 3, 0, 16, 0], [3, 0, 3, 3, 0, 16, 0], [0, 0, 6, 3, 0, 16, 0], [9, 0, 0, 0, 4, 12, 0], [6, 0, 3, 0, 4, 12, 0], [3, 0, 6, 0, 4, 12, 0], [0, 0, 9, 0, 4, 12, 0], [9, 0, 0, 0, 0, 12, 4], [6, 0, 3, 0, 0, 12, 4], [3, 0, 6, 0, 0, 12, 4], [0, 0, 9, 0, 0, 12, 4], ]) assert np.setxor1d(actual, expected).size == 0 profs = hr.expand_deviation_profiles(game, mask, [3, 4], 0) actual = utils.axis_to_elem(profs) expected = utils.axis_to_elem([ [6, 3, 0, 0, 0, 16, 0], [3, 3, 3, 0, 0, 16, 0], [0, 3, 6, 0, 0, 16, 0], [6, 0, 0, 3, 0, 16, 0], [3, 0, 3, 3, 0, 16, 0], [0, 0, 6, 3, 0, 16, 0], ]) assert np.setxor1d(actual, expected).size == 0
def test_rand_dpr_allow_incomplete(add_prob, num_obs, game_desc): """Test that allow_incomplete works for random games""" # Generate games players, strategies, red_players = game_desc base = rsgame.BaseGame(players, strategies) game = gamegen.add_profiles(base, add_prob) sgame = gamegen.add_noise(game, 1, num_obs) red = reduction.DeviationPreserving(strategies, players, red_players) # Try to reduce game red_game = red.reduce_game(game, True) red_sgame = red.reduce_game(sgame, True) # Verify that when allow_incomplete, then reduce returns all profiles reduced_full_profiles = utils.axis_to_elem( red.reduce_profiles(game.profiles)) reduced_profiles = utils.axis_to_elem(red_game.profiles) assert np.setxor1d(reduced_profiles, reduced_full_profiles).size == 0 reduced_sample_profiles = utils.axis_to_elem(red_sgame.profiles) assert np.setxor1d(reduced_sample_profiles, reduced_full_profiles).size == 0 redord = np.argsort(reduced_profiles) redsord = np.argsort(reduced_sample_profiles) assert np.all(np.isnan(red_game.payoffs[redord]) == np.isnan(red_sgame.payoffs[redsord])), \ "sample game and game didn't have same nan payoffs" assert all(np.all(np.isnan(p).any(-1) == np.isnan(p).all(-1)) for p in red_sgame.sample_payoffs), \ "some sample payoffs had partial nans"
def test_twins_dev_expansion(): """Test that dpr dev expansion is correct Note, this is the only one that has "new" code, so it's the most important to test.""" game = rsgame.empty([9, 16], [4, 3]) mask = [True, False, True, False, False, True, False] profs = tr.expand_deviation_profiles(game, mask) actual = utils.axis_to_elem(profs) expected = utils.axis_to_elem([ [8, 1, 0, 0, 0, 16, 0], [0, 1, 8, 0, 0, 16, 0], [8, 0, 0, 1, 0, 16, 0], [0, 0, 8, 1, 0, 16, 0], [9, 0, 0, 0, 1, 15, 0], [5, 0, 4, 0, 1, 15, 0], [0, 0, 9, 0, 1, 15, 0], [9, 0, 0, 0, 0, 15, 1], [5, 0, 4, 0, 0, 15, 1], [0, 0, 9, 0, 0, 15, 1], ]) assert np.setxor1d(actual, expected).size == 0 profs = tr.expand_deviation_profiles(game, mask, role_index=0) actual = utils.axis_to_elem(profs) expected = utils.axis_to_elem([ [8, 1, 0, 0, 0, 16, 0], [0, 1, 8, 0, 0, 16, 0], [8, 0, 0, 1, 0, 16, 0], [0, 0, 8, 1, 0, 16, 0], ]) assert np.setxor1d(actual, expected).size == 0
def test_identity_dev_expansion(): """Test that identity dev expansion is correct""" game = rsgame.empty([3, 4], [4, 3]) mask = [True, False, True, False, False, True, False] profs = ir.expand_deviation_profiles(game, mask) actual = utils.axis_to_elem(profs) expected = utils.axis_to_elem([ [2, 1, 0, 0, 0, 4, 0], [1, 1, 1, 0, 0, 4, 0], [0, 1, 2, 0, 0, 4, 0], [2, 0, 0, 1, 0, 4, 0], [1, 0, 1, 1, 0, 4, 0], [0, 0, 2, 1, 0, 4, 0], [3, 0, 0, 0, 1, 3, 0], [2, 0, 1, 0, 1, 3, 0], [1, 0, 2, 0, 1, 3, 0], [0, 0, 3, 0, 1, 3, 0], [3, 0, 0, 0, 0, 3, 1], [2, 0, 1, 0, 0, 3, 1], [1, 0, 2, 0, 0, 3, 1], [0, 0, 3, 0, 0, 3, 1], ]) assert np.setxor1d(actual, expected).size == 0 profs = ir.expand_deviation_profiles(game, mask, role_index=0) actual = utils.axis_to_elem(profs) expected = utils.axis_to_elem([ [2, 1, 0, 0, 0, 4, 0], [1, 1, 1, 0, 0, 4, 0], [0, 1, 2, 0, 0, 4, 0], [2, 0, 0, 1, 0, 4, 0], [1, 0, 1, 1, 0, 4, 0], [0, 0, 2, 1, 0, 4, 0], ]) assert np.setxor1d(actual, expected).size == 0
def test_dpr(keep_prob, game_desc): """Simple test that dpr functions are consistent""" players, strategies, red_players = game_desc # Create game and reduction game = gamegen.role_symmetric_game(players, strategies) game = gamegen.drop_profiles(game, keep_prob) sgame = gamegen.add_noise(game, 1, 3) red = reduction.DeviationPreserving(strategies, players, red_players) # Try to reduce game assert rsgame.basegame_copy(game) == red.full_game assert red.reduce_game(rsgame.basegame_copy(game)) == red.red_game red_game = red.reduce_game(game) red_game2 = reduction.reduce_game_dpr(game, red_players) red_sgame = red.reduce_game(sgame) # Assert that reduce_game_dpr produces identical results reduced_profiles = utils.axis_to_elem(red_game.profiles) reduced_profiles2 = utils.axis_to_elem(red_game2.profiles) assert np.setxor1d(reduced_profiles, reduced_profiles2).size == 0, \ "different reduction functions didn't produce identical results" # Assert that reducing all profiles covers reduced game reduced_full_profiles = utils.axis_to_elem( red.reduce_profiles(game.profiles)) assert np.setdiff1d(reduced_profiles, reduced_full_profiles).size == 0, \ "reduced game contained profiles it shouldn't have" reduced_sample_profiles = utils.axis_to_elem(red_sgame.profiles) assert np.setdiff1d(reduced_sample_profiles, reduced_full_profiles).size == 0, \ "reduced sample game contained profiles it shouldn't have" assert np.setxor1d(reduced_sample_profiles, reduced_profiles).size == 0, \ "reduced sample game and reduced game had different profiles" # Assert that all contributing profiles are in the expansion of the reduced # game full_profiles = utils.axis_to_elem(game.profiles) full_reduced_profiles = utils.axis_to_elem( red.expand_profiles(red_game.profiles)) assert np.setdiff1d(full_reduced_profiles, full_profiles).size == 0, \ "full game did not have data for all profiles required of reduced" full_reduced_sample_profiles = utils.axis_to_elem( red.expand_profiles(red_sgame.profiles)) assert np.setdiff1d(full_reduced_sample_profiles, full_profiles).size == 0, \ ("full sample game did not have data for all profiles required of " "reduced") assert np.setxor1d(full_reduced_profiles, full_reduced_sample_profiles).size == 0, \ "sample game didn't produce identical results"
def removeTransits(time, flux, period, epoch, duration): halfDur = 0.5 * duration / 24. bad = np.where(time < epoch - period + halfDur)[0] for p in np.arange(epoch, time[-1] + period, period): bad = np.append(bad, np.where((p - halfDur < time) & (time < p + halfDur))[0]) good = np.setxor1d(range(len(time)), bad) return time[good], flux[good]
def get_obstList(self,X,Y,Z): """ return a list of all indices of lattice points within the boundaries of the conical scour pit obstacle. x_s is defined in 'Scour at marine structures' by Richard Whitehouse, 1998. Assumes river sand with phi (angle of repose) equal to 30 degrees. h_cone is equal to rad_cone*tan(30) = rad_cone*0.57735 """ x_c_cone = self.x_c z_c_cone = self.z_c y_c_cone = 0 x_s = 2.25*2*self.cyl_rad rad_cone = x_s + self.cyl_rad h_cone = rad_cone*0.57735 floor_part = np.array(np.where(Y < h_cone)).flatten() dist = (X - self.x_c)**2 + (Z - self.z_c)**2; cyl_part = list(np.array(np.where( dist < self.cyl_rad**2)).flatten()) scour_pit = np.array(np.where( (X - x_c_cone)**2 + (Z - z_c_cone)**2 <= ((self.cyl_rad/cone)/(h_cone))**2*(Y - y_c_cone)**2)) # remove the scour pit from the floor obst_list = np.setxor1d(floor_part[:], np.intersect1d(floor_part[:],scour_pit[:])) # then add the cylinder obst_list = np.union1d(obst_list[:],cyl_part[:]) return list(obst_list[:])
def _arithmetical_operation(self, x, operator, in_place=False): operator, ioperator = { '+': (add, iadd), '-': (sub, isub), '*': (mul, imul), '/': (div, idiv), '**': (pow, ipow) }[operator] if in_place: if isinstance(x, self.__class__): with ndarray_write(self._domain), ndarray_write(self._range): self[self._domain] = operator(self._range, x[self._domain]) exclusive_or = np.setxor1d(self._domain, x.domain) self[exclusive_or] = np.full(exclusive_or.shape, np.nan) else: with ndarray_write(self._range): self.range = ioperator(self.range, x) return self else: copy = ioperator(self.copy(), x) return copy
def sets(): global a, b, c, z, r print '###########################' print '#' print '# 集合操作' print '#' print '###########################' # 唯一元素 print '唯一元素:', np.unique([0, 0, 1, 1, 5, 6, 7, 7, 2, 11], return_index=True, return_inverse=True) # 比较第二个数组中的元素是否在第一个数组中,返回bool类型的数组。 d1 = np.array([0, 1, 2, 5, 0, 5]) d2 = [0, 2, 5] print '元素包含:', np.in1d(d1, d2) # 交集 print '交集:', np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1]) # 差 d1 = np.array([1, 2, 3, 2, 4, 1]) d2 = np.array([3, 4, 5, 6]) print '两个集合的差:', np.setdiff1d(d1, d2) # 异或 d1 = np.array([1, 2, 3, 2, 4]) d2 = np.array([2, 3, 5, 7, 5]) print '两个集合异或:', np.setxor1d(a,b) # 并集 print '并集:', np.union1d([-1, 0, 1], [-2, 0, 2])
def sampleNo(xvar, yvar, N, avoididx): """Sample from pixels in mesh, excluding yes pixels and already sampled no pixels. :param xvar: Numpy array of centers of all columns in mesh. :param yvar: Numpy array of centers of all rows in mesh. :param N: Number of no pixels to sample. :param avoididx: 1D array of indices from mesh that should NOT be sampled from. Initially this will be the array of indices where the yes pixels are. :returns: Randomly chosen list of tuples of (x,y) coordinate points that are outside polygons. """ allidx = np.arange(0, len(xvar)*len(yvar)) # flattened array of all indices in mesh noidx = np.setxor1d(allidx, avoididx) #allidx - avoididx #noidx = np.array(list(set(allidx) - set(avoididx))) nosampleidx = np.random.choice(noidx, size=N,replace=False) newavoididx = np.sort(np.hstack((avoididx, nosampleidx))) rowidx,colidx = np.unravel_index(nosampleidx, (len(yvar), len(xvar))) samples = [] for row,col in zip(rowidx, colidx): xp = xvar[col] yp = yvar[row] samples.append((xp, yp)) return (samples, newavoididx)
def remove_from_file(data, filename): """trim out rows that match genes found in file <filename> First row is ignored if multiple entries per line, seperated by comma, take the first""" #load gene names from file ff = open(filename); xx = ff.readline(); #discard first line hk_genes = list(); for line in ff: entries = line.split(','); hk_genes.append(entries[0].strip().lower()); ff.close(); #match hk genes to gene indices missing = 0; hk_indices = list(); lower_row_labels = [gene.lower() for gene in data.row_labels]; for hk_gene in hk_genes: try: ii = lower_row_labels.index(hk_gene); hk_indices.append(ii); except ValueError: missing+=1; #remove rows that match all_indices = np.arange(data.shape[0]); if(len(hk_indices) != 0): keep_indices = np.setxor1d(all_indices,hk_indices); else: keep_indices = all_indices; return data.subset_genes(keep_indices);
def main(): rec = args.rec r = mdtraj.load(rec) lig = args.lig l = mdtraj.load(lig) d = args.dist temp = args.temp pref = rec.split('.')[0] # can't figure out a non-hacky way to combine pdbs. cp_receptor = 'head -n -2 ' + rec receptor, r_err = call_cl(cp_receptor) cp_ligand = 'tail -n +2 ' + lig ligand, l_err = call_cl(cp_ligand) tf = open(temp, 'w') tf.write(receptor) tf.write(ligand) tf.close() # get indices of receptor within distance d of ligand comb = mdtraj.load(temp) # get ligand indices li = comb.topology.select('not protein') # find neighbors neighbors = mdtraj.compute_neighbors(comb, d, li)[0] # remove ligand from neighbor list (symmetric diff) n = np.setxor1d(li, neighbors) # easier to reset sometimes comb = mdtraj.load(temp) comb.restrict_atoms(n) comb.save_pdb(pref + '_trim.pdb') IPython.embed()
def get_obstList(self,X,Y,Z): """ return a list of all indices of lattice points within the boundaries of the scour pit obstacle """ ellip_a = 2.*2.*self.cyl_rad ellip_b = 2.*self.cyl_rad ellip_c = 8.*self.cyl_rad ellip_x = self.x_c ellip_z = self.z_c + self.cyl_rad ellip_y = ellip_b floor_part = np.array(np.where(Y < ellip_b)).flatten() dist = (X - self.x_c)**2 + (Z - self.z_c)**2; cyl_part = list(np.array(np.where( dist < self.cyl_rad**2)).flatten()) scour_pit = np.array(np.where( (X - ellip_x)**2/(ellip_a**2) + (Y - ellip_y)**2/(ellip_b**2) + (Z - ellip_z)**2/(ellip_c**2) <= 1.)).flatten() # remove the scour pit from the floor obst_list = np.setxor1d(floor_part[:], np.intersect1d(floor_part[:],scour_pit[:])) # then add the cylinder obst_list = np.union1d(obst_list[:],cyl_part[:]) return list(obst_list[:])
def __setSelectionRect(self, rect, action): # Set the current mouse drag selection rectangle if not rect.isValid(): rect = rect.adjusted(-0.01, -0.01, 0.01, 0.01) rect = rect.intersected(self.contentsRect()) indices = self.__selectionIndices(rect) if action & SelectAction.Clear: selection = [] elif self.__selstate is not None: # Mouse drag selection is in progress. Update only the current # selection selection = self.__selstate.selection else: selection = self.__selection if action & SelectAction.Toogle: selection = np.setxor1d(selection, indices) elif action & SelectAction.Deselect: selection = np.setdiff1d(selection, indices) elif action & SelectAction.Select: selection = np.union1d(selection, indices) self.setSelection(selection)
def remove_threes_and_fours(X, Y): """ Y: array-like, shape (n_examples,) """ three_idxs = np.where(Y == 3) four_idxs = np.where(Y == 4) ia = np.indices(Y.shape) remaining_idxs = np.setxor1d(ia, np.concatenate((three_idxs[0], four_idxs[0]))) return X[remaining_idxs], Y[remaining_idxs]
def get_obstList(self,X,Y,Z): """ return a list of all indices of lattice points within the boundaries of the conical scour pit obstacle """ x_c_cone = self.x_c z_c_cone = self.z_c y_c_cone = 0 x_s = 2.25*2*self.cyl_rad rad_cone = x_s + self.cyl_rad h_cone = rad_cone*0.57735 floor_part = np.array(np.where(Y < h_cone)).flatten() dist = (X - self.x_c)**2 + (Z - self.z_c)**2; cyl_part = list(np.array(np.where( dist < self.cyl_rad**2)).flatten()) scour_pit = np.array(np.where( (X - x_c_cone)**2 + (Z - z_c_cone)**2 <= ((self.cyl_rad/cone)/(h_cone))**2*(Y - y_c_cone)**2)) # remove the scour pit from the floor obst_list = np.setxor1d(floor_part[:], np.intersect1d(floor_part[:],scour_pit[:])) # then add the cylinder obst_list = np.union1d(obst_list[:],cyl_part[:]) return list(obst_list[:])
def PrepForTestTrain(dataSet, iterCnt, dirLoc, varNames, varIndxs) : dataLen = dataSet.shape[0] trainLen = numpy.floor(dataLen * 0.6666) for iterIndex in range(iterCnt) : print "%d of %d iterations" % (iterIndex + 1, iterCnt) # Save a random sample of the data for training, and the rest for testing trainSelected = random.sample(range(dataLen), trainLen) testSelected = numpy.setxor1d(trainSelected, range(dataLen)) for subProj in varIndxs.keys() : arffHeader = dirLoc + '/' + subProj + '/arffHeader.txt' MakeARFFHeader(varNames[varIndxs[subProj]], arffHeader) trainStem = "%s/%s/trainingData_%dof%d" % (dirLoc, subProj, iterIndex + 1, iterCnt) testStem = "%s/testingData_%dof%d" % (dirLoc, subProj, iterIndex + 1, iterCnt) numpy.savetxt(trainStem + '.csv', dataSet[trainSelected, varIndxs[subProj]], fmt="%6.4f", delimiter=',') os.system('cat %s %s > %s' % (arffHeader, trainStem + '.csv', trainStem + '.arff')) numpy.savetxt(testStem + '.csv', dataSet[testSelected, varIndxs[subProj]], fmt="%6.4f", delimiter=',') os.system('cat %s %s > %s' % (arffHeader, testStem + '.csv', testStem + '.arff')) return(fileNames)
def csi_compute(param): """worker function""" get_var_from_file(os.path.join(GRATOOLS_CONFIG, 'Csi_config.py')) th_bins = data.TH_BINNING i, veci, dI, R, nside = param if i%10000 == 0: print i dIi = dI[i] Ri = R[i] dIij_list = [[] for l in range(0, len(th_bins)-1)] counts_list = [[] for l in range(0, len(th_bins)-1)] Rij_list = [[] for l in range(0, len(th_bins)-1)] for th, (thmin, thmax) in enumerate(zip(th_bins[:-1], th_bins[1:])): pixintorad_min = hp.query_disc(nside, veci, thmin) pixintorad_max = hp.query_disc(nside, veci, thmax) pixintoring = np.setxor1d(pixintorad_max, pixintorad_min) Rj = R[pixintoring] Rj = Rj[Rj > hp.UNSEEN] dIj = dI[pixintoring] dIj = dIj[dIj > hp.UNSEEN] dIij = np.sum(dIi*dIj)#-Imean**2) Rij = np.sum(Ri*Rj) counts = len(dIj) dIij_list[th].append(dIij) counts_list[th].append(counts) Rij_list[th].append(Rij) return dIij_list, counts_list, Rij_list
def test_sample_game_payoff(): profiles = [ [0, 4, 0, 9], [0, 4, 1, 8], [0, 4, 4, 5], [0, 4, 3, 6], ] payoffs = [ [ [[0] * 4, [1, 2, 3, 4], [0] * 4, [5, 6, 7, 8]], ], [ [[0, 0], [0, 0], [9, 10], [0, 0]], ], [ [[0] * 3, [0] * 3, [0] * 3, [11, 12, 13]], ], [ [[0] * 5, [14, 15, 16, 17, 18], [0] * 5, [0] * 5], ], ] game = rsgame.SampleGame([4, 9], 2, profiles, payoffs) red = reduction.DeviationPreserving([2, 2], [4, 9], [2, 3]) red_game = red.reduce_game(game) prof_map = dict(zip( map(utils.hash_array, red_game.profiles), itertools.chain.from_iterable(red_game.sample_payoffs))) payoffs = prof_map[utils.hash_array([0, 2, 0, 3])] actual = payoffs[1] expected = [1, 2, 3, 4] assert np.setxor1d(actual, expected).size == 0 actual = payoffs[3] expected = [5, 6, 7, 8] assert np.setxor1d(actual, expected).size == 0 payoffs = prof_map[utils.hash_array([0, 2, 1, 2])] actual = payoffs[1] expected = [14, 15, 16, 17, 18] assert np.setxor1d(actual, expected).size == 3 actual = payoffs[2] expected = [9, 10] assert np.setxor1d(actual, expected).size == 0 actual = payoffs[3] expected = [11, 12, 13] assert np.setxor1d(actual, expected).size == 1
def get_obstList(self,X,Y,Z): #x = np.array(X); y = np.array(Y); z = np.array(Z); obst_l = np.where(z < self.L) obst_h = np.where(z > 0.2) obst = np.intersect1d(obst_l[:],obst_h[:]) y_dist1 = np.abs(y - (self.y1+self.a/2.)) ch1 = np.where(y_dist1<self.a/2.) ch1 = np.intersect1d(obst[:],ch1[:]) obst = np.setxor1d(obst[:],ch1[:]) y_dist2 = np.abs(y - (self.y1+self.a/2.+self.S)) ch2 = np.where(y_dist2<self.a/2.) ch2 = np.intersect1d(obst[:],ch2[:]) obst = np.setxor1d(obst[:],ch2[:]) return obst[:]
def divide_and_limit(numerators, denominators, zero_limits): # make sure we have compatible arrays for the following computations if denominators.shape != numerators.shape: message = "numerator and denominator arrays do not share the same shape" logger.error(message) raise ValueError(message) # get the original shape so that later we can reshape the arrays # that we'll flatten back into their original shape original_shape = denominators.shape denominators = denominators.flatten() numerators = numerators.flatten() # create an array of values corresponding to the shape of the input arrays results = np.full(denominators.shape, np.NAN) # get a column vector of indices where denominator is not zero, # so as to avoid divide by zero in the following calculation not_zeros = np.where(denominators != 0) if len(not_zeros[0]) > 0: results[not_zeros] = numerators[not_zeros] / denominators[not_zeros] # get array of index values corresponding to the denominators array, # for example if array is 4 elements long the we get an indices array: [0, 1, 2, 3] index_values = np.array(range(len(denominators)), np.int) # perform an XOR on the array indices and the 'not zeros' array of indices # to get the indices of the array where the value is zero zeros = np.setxor1d(index_values, not_zeros) if zeros.size > 0: # we have a zero denominator sum value so we can't perform the normal calculation at these points, # so we limit the value to the zero limits if len(zero_limits) == 1: results[zeros] = zero_limits[0] elif len(zero_limits) == 2: # find indices where the value is zero, set the value at these indices to the first zero limit limits = np.where(numerators[zeros] == 0) if limits[0].size > 0: results[zeros[limits]] = zero_limits[0] # find indices where the value is not zero, set the value at these indices to the second zero limit limits = np.where(numerators[zeros] != 0) if limits[0].size > 0: results[zeros[limits]] = zero_limits[1] else: message = "Invalid zero limits argument, must contain 1 or 2 values" logger.error(message) raise ValueError(message) # reshape the results back to our original shape and return return np.reshape(results, original_shape)
def split_samples(data, subsample_size): np.random.seed(RANDOM_SEED); #Set seed so outputs are repeatable sub_ii = np.random.choice(data.shape[1], subsample_size, replace=False); holdouts_ii = np.setxor1d(np.arange(data.shape[1]), sub_ii); subset = data.subset_samples(sub_ii); holdouts = data.subset_samples(holdouts_ii); return holdouts, subset;
def find_N(O, j, E_c, E_s): N_cur = np.unique(E_s[j]) N_prev = N_cur.copy() N_cur = expand(N_prev, E_c, E_s) while np.setxor1d(N_cur, N_prev, assume_unique=True).size > 0 and \ not np.setdiff1d(O, N_cur, assume_unique=True).size == 0: N_cur, N_prev = expand(N_cur, E_c, E_s), N_cur res = np.setdiff1d(O, N_prev) return res
def partition(self, indices): '''Partition a Group according to a set of indices, possibly given some logical criterion e.g. using np.where, returning new objects (the set according to its indices and the complement indices := partition according to these''' C = copy.deepcopy(self) D = copy.deepcopy(self) D.atoms = D.atoms[indices] C.atoms = C.atoms[np.setxor1d(np.array(indices), np.arange(len(C.atoms)))] return D, C
def Jmattofolds(Jmat, kfolds, type='ifg'): '''Creates a list with of length nfolds. Every entry of the list is a row index into the original Jmat. Args: * Jmat -> connectivity matrix (Nifg x Nsar) * nfolds -> Number of folds * type -> Can be 'sar', 'ifg' Returns: * trainlist -> List of row indices for training set * testlist -> List of row indices for testing set''' Nifg, Nsar = Jmat.shape flag = True while flag: trainlist = [] testlist = [] if type == 'sar': pos = np.random.permutation(Nsar) elif type == 'ifg': pos = np.random.permutation(Nifg) breakpos = pos.size/kfolds inds = np.arange(Nifg, dtype=np.int) if type == 'sar': trnbreak = np.arange(0,Nsar,breakpos).astype(np.int) if trnbreak[-1] != (Nsar-1): trnbreak = np.append(trnbreak,Nsar-1) elif type == 'ifg': trnbreak = np.arange(0,Nifg,breakpos).astype(np.int) if trnbreak[-1] != (Nifg-1): trnbreak = np.append(trnbreak,Nifg-1) for ii in range(kfolds): itrain = pos[trnbreak[ii]:trnbreak[ii+1]] if type=='sar': temp = np.abs(Jmat[:,itrain]).sum(axis=1) itrain = (temp==2).flatnonzero() itest = np.setxor1d(itrain, inds) trainlist.append(itrain) testlist.append(itest) for ii in range(kfolds): flag = flag & (len(trainlist[ii]) > 2) flag = not flag return trainlist, testlist
def read_html(filename, person): file_data = urllib.urlopen(filename).read() soup = BeautifulSoup(file_data, 'html.parser') date_to_msgs = {} relationships = {} for thread in soup.find_all('div', class_='thread'): # check if thread is b/w 2 people thread_iter = thread.childGenerator() people_involved = thread_iter.next() people_involved = people_involved.split(',') # get all the messages msgs = thread.find_all('p') msgs = [m.get_text().encode('ascii', 'ignore') for m in msgs] # holds the name and timestamp information msg_meta = thread.find_all('div', class_='message') # filter to only messages by our person of interest total_indices = np.arange(len(msg_meta)) meta_filtered_idx = [idx for (idx,p) in enumerate(msg_meta) if (person in p.span.get_text().encode('ascii','ignore'))] msgs_filtered = list(array(msgs)[meta_filtered_idx]) # also the timestamps mapping to the msgs just_timestamps = [str(ts.find('span', class_='meta').get_text()) for ts in msg_meta if ts.find('span', class_='meta')] just_timestamps = [parse(t) for t in just_timestamps] timestamps_filtered = list(array(just_timestamps)[meta_filtered_idx]) for idx,ts in enumerate(timestamps_filtered): if ts not in date_to_msgs: date_to_msgs[ts] = [msgs_filtered[idx]] else: date_to_msgs[ts].append(msgs_filtered[idx]) if len(people_involved) == 2: other = people_involved[0] if person not in people_involved[0] else people_involved[1] # get the other person's info not_indices = np.setxor1d(total_indices, meta_filtered_idx) not_indices = [int(i) for i in not_indices] # sometimes the xor returns floats if (len(not_indices) > 0 and isinstance(not_indices[0], int)): other_filtered = list(array(msgs)[not_indices]) other_timestamps = list(array(just_timestamps)[not_indices]) relationships[other] = ([[timestamps_filtered],[msgs_filtered]], [[other_timestamps],[other_filtered]]) # -person-, -other- ts_grouped = [list(g) for k, g in itertools.groupby(timestamps_filtered, key=lambda d: d.date())] other_ts_grouped = [list(g) for k, g in itertools.groupby(other_timestamps, key=lambda d: d.date())] [senti, msgs_grouped] = group_msgs(msgs_filtered, ts_grouped) [other_senti, other_msgs_grouped] = group_msgs(other_filtered, other_ts_grouped) print senti print other_senti
def get_obstList(self,X,Y,Z): """ return a list of indices within the boundary of the channel floor """ #x = np.array(X); y = np.array(Y); z = np.array(Z); cav1 = np.where(z >= self.z_start) cav2 = np.where(z <= self.z_end) ol = np.setxor1d(cav1[:],cav2[:]) cav3 = np.where(y <= self.depth) ol = np.intersect1d(ol[:],cav3[:]) return ol[:]
def nd_diffxor(a, b, uni=False): """using setxor... it is slower than nd_diff, 36 microseconds vs 18.2 but this is faster for large sets """ a_view = _view_as_(a) b_view = _view_as_(b) good = _check_dtype_(a_view, b_view) # check dtypes if not good: return None ab = np.setxor1d(a_view, b_view, assume_unique=uni) return ab.view(a.dtype).reshape(-1, ab.shape[0]).squeeze()
def calc_indices(self, orig_shape): indices = self.parameters['indices'] if isinstance(indices, list): self.indices = np.array(indices) else: indices_list = indices.split(':') indices_list = [int(i) for i in indices_list] if len(indices_list) is 2: indices_list.append(1) self.indices = np.arange(*indices_list) return np.setxor1d(np.arange(orig_shape), self.indices)
def test_elem_axis(): x = np.array([[5.4, 2.2], [5.7, 2.8], [9.6, 1.2]], float) assert np.all(x == utils.elem_to_axis(utils.axis_to_elem(x), float)) assert np.all(x.astype(int) == utils.elem_to_axis(utils.axis_to_elem(x.astype(int)), int)) assert utils.unique_axis(x).shape == (3, 2) array, counts = utils.unique_axis(x.astype(int), return_counts=True) assert array.shape == (2, 2) assert not np.setxor1d(counts, [2, 1]).size
def main(): import torch from torch.optim import lr_scheduler import torch.optim as optim from torch.autograd import Variable from trainer import fit import numpy as np cuda = torch.cuda.is_available() # Training settings parser = argparse.ArgumentParser( description='cross subject domain adaptation') parser.add_argument('--batch-size', type=int, default=100, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=100, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--save-model', action='store_true', default=True, help='For Saving the current Model') # Writer will output to ./runs/ directory by default fold_idx = 4 gamma = 0.7 margin = 1.0 DAsetting = False args = parser.parse_args() args.seed = 0 args.use_tensorboard = True args.save_model = True n_epochs = 200 startepoch = 0 folder_name = 'exp11_0630' comment = 'deep4' + str(fold_idx) + '_g_' + str(gamma) + '_m_' + str( margin) use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) torch.cuda.manual_seed(args.seed) np.random.seed(args.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = True device = torch.device("cuda" if use_cuda else "cpu") #kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} from datetime import datetime import os loging = False x_data, y_data = load_smt() x_data = x_data[:, :, :, 100:] #get subject number y_subj = np.zeros([108, 200]) for i in range(108): y_subj[i, :] = i * 2 y_subj = y_data.reshape(108, 200) + y_subj y_subj = y_subj.reshape(21600) #y_subj = np.concatenate([y_data,y_subj],axis=1) # plt.imshow(x_data[100,0,:,:]) # For classification data valtype = 'subj' # if x_data.shape[2] != 60: # x_data = x_data[:,:,2:,:] # plt.imshow(x_data[1000,0,:,:]) # #subj - 0-27 train # train_subj1 = np.r_[0:27] # train_subj2 = np.r_[0:27]+54 # # test_subj = np.r_[27:54,54+27:108] #chidx = np.r_[7:11, 12:15, 17:21, 32:41] #오연조건 # chidx = np.r_[2:56, 60:62] # x_data = x_data[:,:,chidx,:] # For Domain adaptation setting if DAsetting: # test_subj = np.r_[fold_idx * 9:fold_idx * 9 + 9, fold_idx * 9 + 54:fold_idx * 9 + 9 + 54] test_subj_id = 39 test_subj = np.r_[test_subj_id:test_subj_id + 1] train_subj1 = np.setxor1d(np.r_[0:108], test_subj) train_subj2 = test_subj n_targets = 60 trial_s = (0, 200) trial_t = (0, n_targets) trial_val = (n_targets, 200) # dataset_train1 = GigaDataset(x=x_data, y=y_data, valtype=valtype, istrain=True,subj=train_subj1,trial=trial_s) dataset_train = GigaDataset(x=x_data, y=y_data, valtype=valtype, istrain=True, subj=train_subj2, trial=trial_t) # dataset_train = dataset_train1.__add__(dataset_train2) dataset_test = GigaDataset(x=x_data, y=y_data, valtype=valtype, istrain=False, subj=test_subj, trial=trial_val) triplet_dataset_train = TripletGigaDA(x=x_data, y=y_subj, valtype=valtype, istrain=True, subj_s=train_subj1, trial_s=trial_s, subj_t=train_subj2, trial_t=trial_t) # triplet_dataset_train2 = TripletGiga2(x=x_data, y=y_subj, valtype=valtype, istrain=True, subj=train_subj2, trial=trial_t) # triplet_dataset_train = triplet_dataset_train1.__add__(triplet_dataset_train2) triplet_dataset_test = TripletGigaDA(x=x_data, y=y_subj, valtype=valtype, istrain=True, subj_s=train_subj1, trial_s=trial_s, subj_t=test_subj, trial_t=trial_val) else: #DG setting # test_subj = np.r_[fold_idx*9:fold_idx*9+9,fold_idx*9+54:fold_idx*9+9+54] # train_subj = test_subj # trial_train = (0, 30) # trial_val = (30, 200) # # bci_excellent = np.r_[43, 20, 27, 1, 28, 32, 35, 44, 36, 2] # bci_excellent = np.concatenate([bci_excellent, bci_excellent + 54]) test_subj = np.r_[fold_idx * 9:fold_idx * 9 + 9, fold_idx * 9 + 54:fold_idx * 9 + 9 + 54] # train_subj = np.setdiff1d(bci_excellent, test_subj) # bci_excellent.sort() print('test subj:' + str(test_subj)) train_subj = np.setdiff1d(np.r_[0:108], test_subj) trial_train = (0, 200) trial_val = (0, 200) dataset_train = GigaDataset(x=x_data, y=y_data, valtype=valtype, istrain=True, subj=train_subj, trial=trial_train) dataset_test = GigaDataset(x=x_data, y=y_data, valtype=valtype, istrain=False, subj=test_subj, trial=trial_val) triplet_dataset_train = TripletGiga2(x=x_data, y=y_subj, valtype=valtype, istrain=True, subj=train_subj, trial=trial_train) # triplet_dataset_train2 = TripletGiga2(x=x_data[:,:,:,10:], y=y_subj, valtype=valtype, istrain=True, subj=train_subj, # trial=trial_train) # triplet_dataset_train = triplet_dataset_train1.__add__(triplet_dataset_train2) triplet_dataset_test = TripletGiga2(x=x_data, y=y_subj, valtype=valtype, istrain=False, subj=test_subj, trial=trial_val) train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=args.batch_size, shuffle=False) triplet_train_loader = torch.utils.data.DataLoader( triplet_dataset_train, batch_size=args.batch_size, shuffle=True) triplet_test_loader = torch.utils.data.DataLoader( triplet_dataset_test, batch_size=args.batch_size, shuffle=False) ################################################################################################################### # make model for metric learning from networks import DWConvNet, basenet, Deep4Net_origin, Deep4Net, Deep4NetWs, EmbeddingDeep4CNN, EmbeddingDeep4CNN_bn, TripletNet, FineShallowCNN, EmbeddingDeepCNN, QuintupletNet, EmbeddingShallowCNN from losses import TripletLoss_dev2, TripLoss, ContrastiveLoss_dk if gamma == 1.0: model = Deep4Net_origin() else: embedding_net = Deep4Net() print(embedding_net) model = TripletNet(embedding_net) #exp3-1 fc레이어 한층더 # model.fc = nn.Sequential( # nn.Linear(model.num_hidden,128), # nn.ReLU(), # nn.Dropout(), # nn.Linear(128,2) # ) if cuda: model.cuda() loss_fn = TripletLoss_dev2(margin, gamma).cuda() log_interval = 10 ########################################################## # optimizer = optim.Adam(model.parameters()) optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0005) milestones = [15, 30, 50, 120] scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1) # 너무 빨리 떨구면 언더피팅하는듯 # exp1 : 62ch 0~5fold까지 셋팅 # optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # scheduler = lr_scheduler.StepLR(optimizer, 5, gamma=0.5, last_epoch=-1) #exp2 : 운동영역주변 20ch, train성능이 fit하지 않는 현상이 g=0.7,1.0 양족에서 모두 나타나서, 기존의 러닝레이트보다 강하게 줘보고 실험코자함 # optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # scheduler = lr_scheduler.StepLR(optimizer, 5, gamma=1.0, last_epoch=-1) # # # #exp4, exp5 # optimizer = optim.SGD(model.parameters(), lr=0.005/gamma, momentum=0.9) # scheduler = lr_scheduler.StepLR(optimizer, 10, gamma=0.8, last_epoch=-1) #너무 빨리 떨구면 언더피팅하는듯 # optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # scheduler = lr_scheduler.StepLR(optimizer, 5, gamma=0.8, last_epoch=-1) #너무 빨리 떨구면 언더피팅하는듯 # exp5 # optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # scheduler = lr_scheduler.StepLR(optimizer, 10, gamma=0.5, last_epoch=-1) # exp7 # optimizer = optim.SGD(model.parameters(), lr=0.005 / gamma, momentum=0.9) # scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[100,200], gamma=0.7) # 너무 빨리 떨구면 언더피팅하는듯 #model for validation evalmodel = nn.Sequential(model.embedding_net, model.fc, nn.LogSoftmax(dim=1)).to(device) print('____________DANet____________') print(model) #save someting model_save_path = 'model/' + folder_name + '/' + comment + '/' if (args.save_model): if not os.path.isdir(model_save_path): os.makedirs(model_save_path) if loging: fname = model_save_path + datetime.today().strftime( "%m_%d_%H_%M") + ".txt" f = open(fname, 'w') if args.use_tensorboard: writer = SummaryWriter(comment=comment) writer.add_text('optimizer', str(optimizer)) writer.add_text('scheduler', str(milestones)) writer.add_text('model_save_path', model_save_path) writer.add_text('model', str(model)) writer.close() # load_model_path = 'C:\\Users\dk\PycharmProjects\giga_cnn\model\deep100_negsubj\\fold_0_g_0.7\danet_0.7_49.pt' #'C:\\Users\dk\PycharmProjects\giga_cnn\구모델\\clf_83_8.pt'#'clf_29.pt' #'triplet_mg26.pt'#'clf_triplet2_5.pt' #'triplet_31.pt' # load_model_path = 'C:\\Users\dk\PycharmProjects\giga_cnn\model\exp6_basenet\\fold_0_g_0.6\danet_0.6_86.pt' if startepoch > 0: load_model_path = model_save_path + 'danet_' + str(gamma) + '_' + str( startepoch) + '.pt' model_save_path = model_save_path + '(cont)' else: load_model_path = None if load_model_path is not None: model.load_state_dict(torch.load(load_model_path)) # for param in model.embedding_net.parameters(): # param.requires_grad = False epochidx = 1 for epochidx in range(100): fit(triplet_train_loader, triplet_test_loader, model, loss_fn, optimizer, scheduler, epochidx, n_epochs, cuda, log_interval) print(epochidx) train_loss, train_score = eval(args, evalmodel, device, train_loader) eval_loss, eval_score = eval(args, evalmodel, device, test_loader) if args.use_tensorboard: writer.add_scalar('Train/Loss', np.mean(train_loss) / args.batch_size, epochidx) writer.add_scalar('Train/Acc', np.mean(train_score) / args.batch_size, epochidx) writer.add_scalar('Eval/Loss', np.mean(eval_loss) / args.batch_size, epochidx) writer.add_scalar('Eval/Acc', np.mean(eval_score) / args.batch_size, epochidx) writer.close() if args.save_model: torch.save( model.state_dict(), model_save_path + 'danet_' + str(gamma) + '_' + str(epochidx) + '.pt')
def load_mask(self, img, index): imgh, imgw = img.shape[0:2] mask_type = self.mask # external + random block if mask_type == 4: mask_type = 1 if np.random.binomial(1, 0.5) == 1 else 3 # external + random block + half elif mask_type == 5: mask_type = np.random.randint(1, 4) # random block if mask_type == 1: mask = create_mask(imgw, imgh, imgw // 2, imgh // 2) return mask if mask_type == 8: # print(imgw, imgh) # x = random.randint(imgw//4, imgw) # y = random.randint(imgh//4, imgh) # mask = create_mask(imgw, imgh, x, y) # if np.random.binomial(1, 0.1) > 0: # mask = np.ones_like(mask) mask = np.ones([imgw, imgh]) mask = (mask * 255).astype(np.uint8) return mask, mask # half if mask_type == 2: # randomly choose right or left return create_mask(imgw, imgh, imgw // 2, imgh, 0 if random.random() < 0.5 else imgw // 2, 0) # external if mask_type == 3: mask_index = random.randint(0, len(self.mask_data) - 1) mask = imread(self.mask_data[mask_index]) mask = self.resize(mask, imgh, imgw) mask = (mask > 0).astype( np.uint8) * 255 # threshold due to interpolation return mask # test mode: load mask non random if mask_type == 6: mask = imread(self.mask_data[index]) mask = self.resize(mask, imgh, imgw, centerCrop=False) mask = rgb2gray(mask) mask = (mask > 0).astype(np.uint8) * 255 return mask if mask_type == 7: bbox = np.array(self.data[index]['word_bb']) max_pad = np.max([imgh, imgw]) if self._mask_pad == -1: # coefficient = 1 # pad = coefficient*self._count//self._mask_pad_update_step # if pad > np.max(self.input_size+coefficient): # pad = np.random.randint(0, np.max(self.input_size), 1)[0] # elif pad == 0: # pad = 0 # else: # pad = np.random.randint(0, pad) if np.random.binomial(1, 0.1) > 0: pad = max_pad else: pad = np.random.randint(self._mask_safe_pad, np.ceil(max_pad / 2)) elif self._mask_pad == -2: # pad = np.random.randint(2, self._mask_pad, 1)[0] if self.data[index]['word_percent'] < 5: pad = 20 elif self.data[index]['word_percent'] < 10: pad = 15 elif self.data[index]['word_percent'] < 15: pad = 10 else: pad = 5 else: pad = self._mask_pad if not self.training: return mask_generation_with_BB([imgh, imgw], bbox, pad), \ mask_generation_with_BB([imgh, imgw], bbox, self._mask_safe_pad) # return np.ones([imgh, imgw]), mask_generation_with_BB([imgh, imgw], bbox, self._mask_safe_pad) nb_instance = bbox.shape[-1] # index_selected = np.random.permutation(nb_instance)[:np.random.choice(nb_instance-1)+1] index_selected = np.random.permutation(nb_instance)[:nb_instance - nb_instance // 5] index_all = np.array(range(nb_instance)) index_not_selected = np.setxor1d(index_selected, index_all) #print(len(index_selected), len(index_not_selected)) BB_not_selected = bbox[..., index_not_selected] BB2_selected = bbox[..., index_selected] mask_not_selected = mask_generation_with_BB([imgh, imgw], BB_not_selected, self._mask_safe_pad) mask_selected = mask_generation_with_BB([imgh, imgw], BB2_selected, self._mask_safe_pad) mask_safe_bbox = np.multiply(mask_selected, 1 - mask_not_selected) if pad >= max_pad or np.sum(mask_safe_bbox) == 0: return np.ones([imgh, imgw]), mask_generation_with_BB( [imgh, imgw], bbox, self._mask_safe_pad) else: mask_selected = mask_generation_with_BB([imgh, imgw], BB2_selected, pad) masks_pad = np.multiply(mask_selected, 1 - mask_not_selected) return masks_pad, mask_safe_bbox
print('Time = ', time.time() - t) #CHS_gather[ki][di] = CHS silh_gather[ki][di] = silh_avg # Distribution of cluster sizes. ClSz, _ = np.histogram(kmLabels, k) ClSz_var[ki][di] = ClSz.var() # Show country names that belong to each cluster xx = [] for i in range(k): print('Cluster #', i) inds = np.where(kmLabels == i) inds = inds[0] ninds = np.setxor1d(inds, range(num_countries)) #print(countriesLL.name[inds]) tradesWithinCluster = trade_ntwrk[np.ix_(inds, inds)] tradesLeavingCluster = trade_ntwrk[np.ix_(inds, ninds)] tradesEnteringCluster = trade_ntwrk[np.ix_(ninds, inds)] tradesOutsideCluster = trade_ntwrk[np.ix_(ninds, ninds)] xx = [ tradesWithinCluster[np.nonzero( tradesWithinCluster)].mean(), tradesLeavingCluster[ np.nonzero(tradesLeavingCluster)].mean(), tradesEnteringCluster[np.nonzero( tradesEnteringCluster)].mean(), tradesOutsideCluster[np.nonzero( tradesOutsideCluster)].mean(),
def computeTravelTimes(self, slowness, calcOthers=False): """Compute the travel times and fill data and time matrix for later use of response and Jacobian, respectively. For response only active sources are needed, for Jacobian all. """ mesh = self.mesh() nNodes = mesh.nodeCount() midPoints = self.mesh().cellCenters() param_markers = np.unique(mesh.cellMarkers()) param_count = len(param_markers) data = self.data() if len(slowness) == mesh.cellCount(): mesh.setCellAttributes(slowness) # self.mapModel(slowness) elif len(slowness) == param_count: # map the regions in the mesh to slowness slow_map = pg.stdMapF_F() min_reg_num = min(param_markers) for i, si in enumerate(slowness): slow_map.insert(float(i + min_reg_num), si) mesh.mapCellAttributes(slow_map) else: raise ValueError("Wrong no of parameters. Mesh size: {}, no " "of regions: {}, and number of slowness values:" "{}".format(mesh.cellCount(), param_count, len(slowness))) times = pg.RVector(nNodes, 0.) upTags = np.zeros(nNodes) downTags = np.zeros(nNodes) sourceIndices = np.unique(data("s")) if calcOthers: ns = len(sourceIndices) geophoneIndices = np.setxor1d(np.arange(data.sensorCount()), sourceIndices) sourceIndices = geophoneIndices # geophoneIndices = np.unique(data("g")) if self.debug: print("{:d}-{:d}={:d}".format(data.sensorCount(), ns, len(sourceIndices))) # if self.debug: # resize not working # self.solution().resize(self.mesh().nodeCount(), self.nSensors) # print(self.solution().rows(), self.solution().cols()) for iSource in np.array(sourceIndices, dtype=int): if self.debug: print(iSource) # initial condition (reset vectors) times *= 0.0 upTags *= 0 downTags *= 0 downwind = set() source = data.sensorPosition(int(iSource)) cell = mesh.findCell(source) # fill in nodes around source using local smoothness for i, n in enumerate(cell.nodes()): times[n.id()] = cell.attribute() * n.pos().distance(source) upTags[n.id()] = 1 for i, n in enumerate(cell.nodes()): tmpNodes = pg.commonNodes(n.cellSet()) for nn in tmpNodes: if not upTags[nn.id()] and not downTags[nn.id()]: downwind.add(nn) downTags[nn.id()] = 1 while len(downwind) > 0: # start fast marching fastMarch(mesh, downwind, times, upTags, downTags) self.dataMatrix[iSource] = pg.interpolate( mesh, times, destPos=data.sensorPositions()) self.timeMatrix[iSource] = pg.interpolate(mesh, times, destPos=midPoints) if self.debug: print(self.solution().rows(), self.solution().cols()) print(len(times), self.mesh()) self.solution()[int(iSource)] = times self.solution().setCol(int(iSource), times)
def generate_clusters(self, density_factor, ligand_file, clustercenter_file): """Generate hydration sites from water molecules found in the binding site during the simulation. Clustering is done in two steps; i). An initial clustering over a 10% of frames, and ii). A refinement step where all frames are used. Parameters ---------- ligand_file : string Name of the PDB file containing atomic coordinates of the ligand, assumed to be co-crystallized with the protein. Returns ------- final_cluster_coords : numpy.ndarray Coordinates of hydration sites, represented by a 2-D array with shape N x 3, where N is the number of hydration sites identified during clustering. site_waters : list List of N sub-lists where N is the number of identified hydration sites, each sublist consist of a 3-element tuple for every water identified in that site. First element of the tuple is frame number, second element is correct index of the oxygen atom in the the original topology and third element is the offset index as read from a version of a trimmed version trajectory for clustering. Notes ----- The following attributes of the object are updated when the clustering is successfully completed. self.hsa_region_O_ids: The indices of water oxygen atoms in HSA region for each frame are stored in the corresponding lists. self.hsa_region_flat_ids: Same as above except that indices are not atom indices from the topology but in a sequence from 0 to N, where N is the total number of water oxygen atoms found in the HSA region throughout the simulation. self.hsa_region_water_coords: An N x 3 numpy array is initialized, where N is the total number of water water oxygen atoms found in the HSA region throughout the simulation. The array gets populated during individual frame processing. """ sphere_radius = md.utils.in_units_of(1.0, "angstroms", "nanometers") topology = md.load_topology(self.topology_file) if self.non_water_atom_ids.shape[0] == 0: raise Exception( ValueError, "Clustering is supported only for solute-solvent systems, no solute atoms found." ) ligand = md.load_pdb(ligand_file, no_boxchk=True) ligand_coords = ligand.xyz[0, :, :] binding_site_atom_indices = np.asarray( list(range(ligand_coords.shape[0]))) init_cluster_coords = None # Step 1: Initial Clustering if user didn't provide cluster centers if clustercenter_file is None: clustering_stride = 10 print("Reading trajectory for clustering.") with md.open(self.trajectory) as f: f.seek(self.start_frame) # read all frames if no frames specified by user if self.num_frames is None: trj_short = f.read_as_traj( topology, atom_indices=np.concatenate( (binding_site_atom_indices, self.wat_oxygen_atom_ids )))[self.start_frame::clustering_stride] else: trj_short = f.read_as_traj( topology, atom_indices=np.concatenate((binding_site_atom_indices, self.wat_oxygen_atom_ids)) )[self.start_frame:self.num_frames:clustering_stride] print(trj_short.n_frames) if trj_short.n_frames < 10: sys.exit( "Clustering requires at least 100 frames, current trajectory contains {0:d} frames." .format(trj_short.n_frames)) print("Performing an initial clustering over {0:d} frames.". format(trj_short.n_frames)) # Obtain water molecules solvating the binding site # FIXME: This is a workaround to use MDTraj compute_neighbor function xyz coordinates of the trajectory are # modified such that first n atoms coordinates are switched to n atoms of ligand coordinates. # Unexpected things will happen if the number of solute atoms less than the number of ligand atoms, which is # highly unlikely. coords = trj_short.xyz for i_frame in range(trj_short.n_frames): for pseudo_index in range( binding_site_atom_indices.shape[0]): coords[i_frame, pseudo_index, :] = ligand_coords[ pseudo_index, :] haystack = np.setdiff1d(trj_short.topology.select("all"), binding_site_atom_indices) binding_site_waters = md.compute_neighbors( trj_short, self.hsa_region_radius, binding_site_atom_indices, haystack_indices=haystack) # generate a list of tuples, each tuple is a water and corresponding frame number in trj_short water_id_frame_list = [(i, nbr) for i in range(len(binding_site_waters)) for nbr in binding_site_waters[i]] # Start initial clustering by building a KDTree and get initial neighbor count for all waters water_coordinates = np.ma.array( [coords[wat[0], wat[1], :] for wat in water_id_frame_list], mask=False) tree = spatial.cKDTree(water_coordinates) nbr_list = tree.query_ball_point(water_coordinates, sphere_radius) nbr_count_list = np.ma.array([len(nbrs) for nbrs in nbr_list], mask=False) cutoff = trj_short.n_frames * density_factor * 0.1401 if np.ceil(cutoff) - cutoff <= 0.5: cutoff = np.ceil(cutoff) else: cutoff = np.floor(cutoff) n_wat = 3 * cutoff # Set up clustering loop cluster_list = [] cluster_iter = 0 while n_wat > cutoff: # Get water with max nbrs and retrieve its neighbors and marked for exclusion in next iteration max_index = np.argmax(nbr_count_list) to_exclude = np.array(nbr_list[max_index]) # Set current water count to current neighbors plus one for the water itself n_wat = len(to_exclude) + 1 # Mask current water, its neighbors so that they are not considered in the next iteration nbr_count_list.mask[to_exclude] = True nbr_count_list.mask[max_index] = True # Mask current waters' and its neighbors' coords so that they are not considered in the next iteration water_coordinates.mask[to_exclude] = True water_coordinates.mask[max_index] = True # Accumulate neighbors for each water in current cluster, removing common neighbors nbrs_of_to_exclude = np.unique( np.array([ n_excluded for excluded_nbrs in nbr_list[to_exclude] for n_excluded in excluded_nbrs ])) # Obtain the list of waters whose neighbors need to be updated due to exclusion of the waters above to_update = np.setxor1d(to_exclude, nbrs_of_to_exclude) to_update = np.setdiff1d(to_update, np.asarray(max_index)) # Update the neighbor count for each water from the list generated above if to_update.shape[0] != 0: tree = spatial.cKDTree(water_coordinates) updated_nbr_list = tree.query_ball_point( water_coordinates[to_update], sphere_radius) # for each updated member, get its original index and update the original neighbor search list for index, nbrs in enumerate(updated_nbr_list): if not nbr_count_list.mask[to_update[index]]: nbr_count_list[to_update[index]] = len(nbrs) # Check distances with previously identified clusters and do not consider if within 1.2 A # of an existing cluster current_wat = water_id_frame_list[max_index] current_wat_coords = md.utils.in_units_of( coords[current_wat[0], current_wat[1], :], "nanometers", "angstroms") near_flag = 0 if len(cluster_list) != 0: for clust in cluster_list: clust_coords = coords[clust[0], clust[1], :] dist = np.linalg.norm(current_wat_coords - clust_coords) if dist < 1.20: near_flag += 1 if near_flag == 0: cluster_iter += 1 cluster_list.append(water_id_frame_list[max_index]) init_cluster_coords = [ coords[cluster[0], cluster[1], :] for cluster in cluster_list ] else: clusters_pdb_file = md.load_pdb(clustercenter_file, no_boxchk=True) init_cluster_coords = clusters_pdb_file.xyz[0, :, :] # Read full trajectory print("Reading trajectory to obtain water molecules for each cluster.") with md.open(self.trajectory) as f: f.seek(self.start_frame) if self.num_frames is None: trj = f.read_as_traj(topology, stride=1, atom_indices=np.concatenate( (binding_site_atom_indices, self.wat_oxygen_atom_ids))) self.num_frames = trj.n_frames else: trj = f.read_as_traj(topology, n_frames=self.num_frames, stride=1, atom_indices=np.concatenate( (binding_site_atom_indices, self.wat_oxygen_atom_ids))) if trj.n_frames < self.num_frames: print(( "Warning: {0:d} frames found in the trajectory, resetting self.num_frames." .format(trj.n_frames))) self.num_frames = trj.n_frames for i_frame in range(trj.n_frames): for pseudo_index in range(binding_site_atom_indices.shape[0]): trj.xyz[i_frame, pseudo_index, :] = ligand_coords[pseudo_index, :] haystack = np.setdiff1d(trj.topology.select("all"), binding_site_atom_indices) start_point = haystack[0] binding_site_waters = md.compute_neighbors( trj, self.hsa_region_radius, binding_site_atom_indices, haystack_indices=haystack) # From the full frame-wise set of waters in the binding site, build two more frame-wise lists # one where each frame has a correct index of waters and another with a new index which ranges from # 0 to M, where M is the total number of hsa region waters - 1 start = 0 for i in range(len(binding_site_waters)): self.hsa_region_O_ids.append([]) self.hsa_region_flat_ids.append([]) for wat in binding_site_waters[i]: wat_0 = wat - start_point wat_offset = ( wat_0 * self.water_sites) + self.wat_oxygen_atom_ids[0] self.hsa_region_O_ids[i].append(wat_offset) self.hsa_region_flat_ids[i].append(start) start += 3 water_id_frame_list = [(i, nbr) for i in range(len(binding_site_waters)) for nbr in binding_site_waters[i]] water_coordinates = np.array( [trj.xyz[wat[0], wat[1], :] for wat in water_id_frame_list]) # Initialize array that stores coordinates all water molecules in HSA region, used for entropy calcs self.hsa_region_water_coords = np.zeros( (len(water_id_frame_list) * 3, 3), dtype=float) tree = spatial.cKDTree(water_coordinates) nbr_list = tree.query_ball_point(init_cluster_coords, sphere_radius) final_cluster_coords = [] cutoff = int(self.num_frames * density_factor * 0.1401) if np.ceil(cutoff) - cutoff <= 0.5: cutoff = np.ceil(cutoff) else: cutoff = np.floor(cutoff) # apply refinement if user defined clusters not provided if clustercenter_file is None: # Step 2: Refinement # Initialize variables and data structures # Read in the trajectory but only first N solute atoms where N equals the number of ligand atoms # plus all water oxygen atoms # WARNING: This shifts indices of waters and once they are assigned to clusters, the indices need to # be corrected. print(( "Refining initial cluster positions by considering {0:d} frames." .format(self.num_frames))) # For each cluster, set cluster center equal to geometric center of all waters in the cluster site_waters = [] cluster_index = 1 for cluster in nbr_list: cluster_water_coords = water_coordinates[cluster] if len(cluster) > cutoff: near_flag = 0 waters_offset = [ (water_id_frame_list[wat][0] + self.start_frame, ((water_id_frame_list[wat][1] - start_point) * self.water_sites) + self.wat_oxygen_atom_ids[0]) for wat in cluster ] com = np.zeros(3) masses = np.ones(cluster_water_coords.shape[0]) masses /= masses.sum() com[:] = water_coordinates[cluster].T.dot(masses) cluster_center = com[:] # Raise flag if the current cluster center is within 1.2 A of existing cluster center for other, coord in enumerate(final_cluster_coords[:-1]): dist = np.linalg.norm( md.utils.in_units_of(cluster_center, "nanometers", "angstroms") - coord) if dist < 1.20: near_flag += 1 # Only add cluster center if it is at a safe distance from others if near_flag == 0: final_cluster_coords.append( md.utils.in_units_of(cluster_center, "nanometers", "angstroms")) site_waters.append(waters_offset) cluster_index += 1 # otherwise store data for each user defined cluster else: # For each cluster, set cluster center equal to geometric center of all waters in the cluster final_cluster_coords = md.utils.in_units_of( init_cluster_coords, "nanometers", "angstroms") site_waters = [] cluster_index = 1 for cluster in nbr_list: waters_offset = [ (water_id_frame_list[wat][0] + self.start_frame, ((water_id_frame_list[wat][1] - start_point) * self.water_sites) + self.wat_oxygen_atom_ids[0]) for wat in cluster ] site_waters.append(waters_offset) cluster_index += 1 # Write clustercenter file write_watpdb_from_coords("clustercenterfile", final_cluster_coords) self.clustercenter_file = "clustercenterfile.pdb" print(("Final number of clusters: {0:d}".format( len(final_cluster_coords)))) return np.asarray(final_cluster_coords), site_waters
def _train__(self): # Init pop and calculate fitness pop = [self._create_solution__(minmax=0) for _ in range(self.pop_size)] # Find the pathfinder pop = sorted(pop, key=lambda temp: temp[self.ID_FIT]) g_best = deepcopy(pop[0]) gbest_present = deepcopy(g_best) for i in range(self.epoch): alpha, beta = np.random.uniform(1, 2, 2) A = np.random.uniform(self.domain_range[0], self.domain_range[1]) * np.exp( -2 * (i + 1) / self.epoch) ## Update the position of pathfinder and check the bound temp = gbest_present[self.ID_POS] + 2 * np.random.uniform() * ( gbest_present[self.ID_POS] - g_best[self.ID_POS]) + A temp = self._amend_solution_and_return__(temp) fit = self._fitness_model__(temp) g_best = deepcopy(gbest_present) if fit < gbest_present[self.ID_FIT]: gbest_present = [temp, fit] pop[0] = deepcopy(gbest_present) ## Update positions of members, check the bound and calculate new fitness for j in range(1, self.pop_size): temp1 = deepcopy(pop[j][self.ID_POS]) t1 = beta * np.random.uniform() * (gbest_present[self.ID_POS] - temp1) my_list_idx = np.setxor1d(np.array(range(1, self.pop_size)), np.array([j])) idx = np.random.choice(my_list_idx) dist = np.linalg.norm(pop[idx][self.ID_POS] - temp1) t2 = alpha * np.random.uniform() * (pop[idx][self.ID_POS] - temp1) t3 = np.random.uniform( self.domain_range[0], self.domain_range[1], self.problem_size) * (1 - (i + 1) * 1.0 / self.epoch) * dist temp1 += t1 + t2 + t3 ## Update members temp1 = self._amend_solution_and_return__(temp1) fit = self._fitness_model__(temp1) if fit < pop[j][self.ID_FIT]: pop[j] = [temp1, fit] ## Update the best solution found so far (current pathfinder) pop = sorted(pop, key=lambda temp: temp[self.ID_FIT]) current_best = deepcopy(pop[self.ID_MIN_PROBLEM]) if current_best[self.ID_FIT] < gbest_present[self.ID_FIT]: gbest_present = deepcopy(current_best) self.loss_train.append(gbest_present[self.ID_FIT]) if self.print_train: print("Generation : {0}, best result so far: {1}".format( i + 1, gbest_present[self.ID_FIT])) return gbest_present[self.ID_FIT], self.loss_train
def main(): try: import sklearn if sklearn.__version__ < "0.20": gs.fatal("Package python3-scikit-learn 0.20 or newer is not installed") except ImportError: gs.fatal("Package python3-scikit-learn 0.20 or newer is not installed") try: import pandas as pd except ImportError: gs.fatal("Package python3-pandas 0.25 or newer is not installed") # parser options --------------------------------------------------------------------------------------------------- group = options["group"] training_map = options["training_map"] training_points = options["training_points"] field = options["field"] model_save = options["save_model"] model_name = options["model_name"] hyperparams = { "penalty": options["penalty"], "alpha": options["alpha"], "l1_ratio": options["l1_ratio"], "C": options["c"], "epsilon": options["epsilon"], "min_samples_leaf": options["min_samples_leaf"], "n_estimators": options["n_estimators"], "learning_rate": options["learning_rate"], "subsample": options["subsample"], "max_depth": options["max_depth"], "max_features": options["max_features"], "n_neighbors": options["n_neighbors"], "weights": options["weights"], "hidden_layer_sizes": options["hidden_units"], } cv = int(options["cv"]) group_raster = options["group_raster"] importances = flags["f"] preds_file = options["preds_file"] classif_file = options["classif_file"] fimp_file = options["fimp_file"] param_file = options["param_file"] norm_data = flags["s"] random_state = int(options["random_state"]) load_training = options["load_training"] save_training = options["save_training"] n_jobs = int(options["n_jobs"]) balance = flags["b"] category_maps = option_to_list(options["category_maps"]) # define estimator ------------------------------------------------------------------------------------------------- hyperparams, param_grid = process_param_grid(hyperparams) estimator, mode = predefined_estimators( model_name, random_state, n_jobs, hyperparams ) # remove dict keys that are incompatible for the selected estimator estimator_params = estimator.get_params() param_grid = { key: value for key, value in param_grid.items() if key in estimator_params } scoring, search_scorer = scoring_metrics(mode) # checks of input options ------------------------------------------------------------------------------------------ if ( mode == "classification" and balance is True and model_name not in check_class_weights() ): gs.warning(model_name + " does not support class weights") balance = False if mode == "regression" and balance is True: gs.warning("Balancing of class weights is only possible for classification") balance = False if classif_file: if cv <= 1: gs.fatal( "Output of cross-validation global accuracy requires cross-validation cv > 1" ) if not os.path.exists(os.path.dirname(classif_file)): gs.fatal("Directory for output file {} does not exist".format(classif_file)) # feature importance file selected but no cross-validation scheme used if importances: if sklearn.__version__ < "0.22": gs.fatal("Feature importances calculation requires scikit-learn version >= 0.22") if fimp_file: if importances is False: gs.fatal('Output of feature importance requires the "f" flag to be set') if not os.path.exists(os.path.dirname(fimp_file)): gs.fatal("Directory for output file {} does not exist".format(fimp_file)) # predictions file selected but no cross-validation scheme used if preds_file: if cv <= 1: gs.fatal( "Output of cross-validation predictions requires cross-validation cv > 1" ) if not os.path.exists(os.path.dirname(preds_file)): gs.fatal("Directory for output file {} does not exist".format(preds_file)) # define RasterStack ----------------------------------------------------------------------------------------------- stack = RasterStack(group=group) if category_maps is not None: stack.categorical = category_maps # extract training data -------------------------------------------------------------------------------------------- if load_training != "": X, y, cat, class_labels, group_id = load_training_data(load_training) if class_labels is not None: a = pd.DataFrame({"response": y, "labels": class_labels}) a = a.drop_duplicates().values class_labels = {k: v for (k, v) in a} else: gs.message("Extracting training data") if group_raster != "": stack.append(group_raster) if training_map != "": X, y, cat = stack.extract_pixels(training_map) y = y.flatten() with RasterRow(training_map) as src: class_labels = {v: k for (k, v, m) in src.cats} if "" in class_labels.values(): class_labels = None elif training_points != "": X, y, cat = stack.extract_points(training_points, field) y = y.flatten() if y.dtype in (np.object_, np.object): from sklearn.preprocessing import LabelEncoder le = LabelEncoder() y = le.fit_transform(y) class_labels = {k: v for (k, v) in enumerate(le.classes_)} else: class_labels = None # take group id from last column and remove from predictors if group_raster != "": group_id = X[:, -1] X = np.delete(X, -1, axis=1) stack.drop(group_raster) else: group_id = None # check for labelled pixels and training data if y.shape[0] == 0 or X.shape[0] == 0: gs.fatal( "No training pixels or pixels in imagery group " "...check computational region" ) from sklearn.utils import shuffle if group_id is None: X, y, cat = shuffle(X, y, cat, random_state=random_state) else: X, y, cat, group_id = shuffle( X, y, cat, group_id, random_state=random_state ) if save_training != "": save_training_data( save_training, X, y, cat, class_labels, group_id, stack.names ) # cross validation settings ---------------------------------------------------------------------------------------- # inner resampling method (cv=2) from sklearn.model_selection import GridSearchCV, StratifiedKFold, GroupKFold, KFold if any(param_grid) is True: if group_id is None and mode == "classification": inner = StratifiedKFold(n_splits=2, random_state=random_state) elif group_id is None and mode == "regression": inner = KFold(n_splits=2, random_state=random_state) else: inner = GroupKFold(n_splits=2) else: inner = None # outer resampling method (cv=cv) if cv > 1: if group_id is None and mode == "classification": outer = StratifiedKFold(n_splits=cv, random_state=random_state) elif group_id is None and mode == "regression": outer = KFold(n_splits=cv, random_state=random_state) else: outer = GroupKFold(n_splits=cv) # modify estimators that take sample_weights ----------------------------------------------------------------------- if balance is True: from sklearn.utils import compute_class_weight class_weights = compute_class_weight(class_weight="balanced", classes=(y), y=y) fit_params = {"sample_weight": class_weights} else: class_weights = None fit_params = {} # preprocessing ---------------------------------------------------------------------------------------------------- from sklearn.pipeline import Pipeline from sklearn.compose import ColumnTransformer from sklearn.preprocessing import StandardScaler, OneHotEncoder # standardization if norm_data is True and category_maps is None: scaler = StandardScaler() trans = ColumnTransformer( remainder="passthrough", transformers=[("scaling", scaler, np.arange(0, stack.count))], ) # one-hot encoding elif norm_data is False and category_maps is not None: enc = OneHotEncoder(handle_unknown="ignore", sparse=False) trans = ColumnTransformer( remainder="passthrough", transformers=[("onehot", enc, stack.categorical)] ) # standardization and one-hot encoding elif norm_data is True and category_maps is not None: scaler = StandardScaler() enc = OneHotEncoder(handle_unknown="ignore", sparse=False) trans = ColumnTransformer( remainder="passthrough", transformers=[ ("onehot", enc, stack.categorical), ("scaling", scaler, np.setxor1d( range(stack.count), stack.categorical).astype('int')), ], ) # combine transformers if norm_data is True or category_maps is not None: estimator = Pipeline([("preprocessing", trans), ("estimator", estimator)]) param_grid = wrap_named_step(param_grid) fit_params = wrap_named_step(fit_params) if any(param_grid) is True: estimator = GridSearchCV( estimator=estimator, param_grid=param_grid, scoring=search_scorer, n_jobs=n_jobs, cv=inner, ) # estimator training ----------------------------------------------------------------------------------------------- gs.message(os.linesep) gs.message(("Fitting model using " + model_name)) if balance is True and group_id is not None: estimator.fit(X, y, groups=group_id, **fit_params) elif balance is True and group_id is None: estimator.fit(X, y, **fit_params) else: estimator.fit(X, y) # message best hyperparameter setup and optionally save using pandas if any(param_grid) is True: gs.message(os.linesep) gs.message("Best parameters:") optimal_pars = [ (k.replace("estimator__", "").replace("selection__", "") + " = " + str(v)) for (k, v) in estimator.best_params_.items() ] for i in optimal_pars: gs.message(i) if param_file != "": param_df = pd.DataFrame(estimator.cv_results_) param_df.to_csv(param_file) # cross-validation ------------------------------------------------------------------------------------------------- if cv > 1: from sklearn.metrics import classification_report from sklearn import metrics if ( mode == "classification" and cv > np.histogram(y, bins=np.unique(y))[0].min() ): gs.message(os.linesep) gs.fatal( "Number of cv folds is greater than number of " "samples in some classes" ) gs.message(os.linesep) gs.message("Cross validation global performance measures......:") if ( mode == "classification" and len(np.unique(y)) == 2 and all([0, 1] == np.unique(y)) ): scoring["roc_auc"] = metrics.roc_auc_score from sklearn.model_selection import cross_val_predict preds = cross_val_predict( estimator, X, y, group_id, cv=outer, n_jobs=n_jobs, fit_params=fit_params ) test_idx = [test for train, test in outer.split(X, y)] n_fold = np.zeros((0,)) for fold in range(outer.get_n_splits()): n_fold = np.hstack((n_fold, np.repeat(fold, test_idx[fold].shape[0]))) preds = {"y_pred": preds, "y_true": y, "cat": cat, "fold": n_fold} preds = pd.DataFrame(data=preds, columns=["y_pred", "y_true", "cat", "fold"]) gs.message(os.linesep) gs.message("Global cross validation scores...") gs.message(os.linesep) gs.message("Metric \t Mean \t Error") for name, func in scoring.items(): score_mean = ( preds.groupby("fold") .apply(lambda x: func(x["y_true"], x["y_pred"])) .mean() ) score_std = ( preds.groupby("fold") .apply(lambda x: func(x["y_true"], x["y_pred"])) .std() ) gs.message( name + "\t" + str(score_mean.round(3)) + "\t" + str(score_std.round(3)) ) if mode == "classification": gs.message(os.linesep) gs.message("Cross validation class performance measures......:") report_str = classification_report( y_true=preds["y_true"], y_pred=preds["y_pred"], sample_weight=class_weights, output_dict=False, ) report = classification_report( y_true=preds["y_true"], y_pred=preds["y_pred"], sample_weight=class_weights, output_dict=True, ) report = pd.DataFrame(report) gs.message(report_str) if classif_file != "": report.to_csv(classif_file, mode="w", index=True) # write cross-validation predictions to csv file if preds_file != "": preds.to_csv(preds_file, mode="w", index=False) text_file = open(preds_file + "t", "w") text_file.write('"Real", "Real", "integer", "integer"') text_file.close() # feature importances ---------------------------------------------------------------------------------------------- if importances is True: from sklearn.inspection import permutation_importance fimp = permutation_importance( estimator, X, y, scoring=search_scorer, n_repeats=5, n_jobs=n_jobs, random_state=random_state, ) feature_names = deepcopy(stack.names) feature_names = [i.split("@")[0] for i in feature_names] fimp = pd.DataFrame( { "feature": feature_names, "importance": fimp["importances_mean"], "std": fimp["importances_std"], } ) gs.message(os.linesep) gs.message("Feature importances") gs.message("Feature" + "\t" + "Score") for index, row in fimp.iterrows(): gs.message( row["feature"] + "\t" + str(row["importance"]) + "\t" + str(row["std"]) ) if fimp_file != "": fimp.to_csv(fimp_file, index=False) # save the fitted model import joblib joblib.dump((estimator, y, class_labels), model_save)
import numpy as np a = np.array([1, 2, 3]) b = np.array([3, 4, 5]) print("a: ", a) print("b: ", b) print("1차원 합집합: ", np.union1d(a, b)) print("1차원 교집합: ", np.intersect1d(a, b)) print("1차원 차집합: ", np.setdiff1d(a, b)) print("1차원 대칭차집합(합집합 - 교집합): ", np.setxor1d(a, b)) print("난수 발생(1 개): ", np.random.random(1)) print("난수 발생(3 개): ", np.random.random(3)) print("평균: 0 / 표준편자: 1 / 갯수: 1") print("난수 발생(1 개): ", np.random.normal(0, 1, 1)) print("평균: 1 / 표준편자: 1 / 갯수: 3") print("난수 발생(3 개): ", np.random.normal(1, 1, 3)) # 출처: http://expert0226.tistory.com/357 [여름나라겨울이야기]
def __getitem__(self, i): for n, (image_path, label_path) in enumerate( zip( self.image_path_list[i * self.batch_size:(i + 1) * self.batch_size], self.label_path_list[i * self.batch_size:(i + 1) * self.batch_size])): image = cv2.imread(image_path, 1) label = cv2.imread(label_path, 0) labels = np.unique(label) if self.blur and random.randint(0, 1): image = cv2.GaussianBlur(image, (self.blur, self.blur), 0) if self.resize_shape and not self.crop_shape: image = cv2.resize(image, self.resize_shape) label = cv2.resize(label, self.resize_shape, interpolation=cv2.INTER_NEAREST) if self.crop_shape: image, label = _random_crop(image, label, self.crop_shape) # Do augmentation if self.horizontal_flip and random.randint(0, 1): image = cv2.flip(image, 1) label = cv2.flip(label, 1) if self.vertical_flip and random.randint(0, 1): image = cv2.flip(image, 0) label = cv2.flip(label, 0) if self.brightness: factor = 1.0 + random.gauss(mu=0.0, sigma=self.brightness) if random.randint(0, 1): factor = 1.0 / factor table = np.array([((i / 255.0)**factor) * 255 for i in np.arange(0, 256)]).astype(np.uint8) image = cv2.LUT(image, table) if self.rotation: angle = random.gauss(mu=0.0, sigma=self.rotation) else: angle = 0.0 if self.zoom: scale = random.gauss(mu=1.0, sigma=self.zoom) else: scale = 1.0 if self.rotation or self.zoom: M = cv2.getRotationMatrix2D( (image.shape[1] // 2, image.shape[0] // 2), angle, scale) image = cv2.warpAffine(image, M, (image.shape[1], image.shape[0])) label = cv2.warpAffine(label, M, (label.shape[1], label.shape[0])) if self.histeq: # and convert to RGB img_yuv = cv2.cvtColor(image, cv2.COLOR_BGR2YUV) img_yuv[:, :, 0] = clahe.apply(img_yuv[:, :, 0]) image = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2BGR) # to BGR label = label.astype('int32') for j in np.setxor1d(np.unique(label), labels): label[label == j] = self.n_classes y = label.flatten() y[y > (self.n_classes - 1)] = self.n_classes self.Y[n] = np.expand_dims(y, -1) self.F[n] = (self.Y[n] != 0).astype( 'float32') # get all pixels that aren't background valid_pixels = self.F[n][ self.Y[n] != self. n_classes] # get all pixels (bg and foregroud) that aren't void u_classes = np.unique(valid_pixels) class_weights = class_weight.compute_class_weight( 'balanced', u_classes, valid_pixels) class_weights = { class_id: w for class_id, w in zip(u_classes, class_weights) } if len(class_weights) == 1: # no bg\no fg if 1 in u_classes: class_weights[0] = 0. else: class_weights[1] = 0. elif not len(class_weights): class_weights[0] = 0. class_weights[1] = 0. sw_valid = np.ones(y.shape) np.putmask(sw_valid, self.Y[n] == 0, class_weights[0]) # background weights np.putmask(sw_valid, self.F[n], class_weights[1]) # foreground wegihts np.putmask(sw_valid, self.Y[n] == self.n_classes, 0) self.F_SW[n] = sw_valid self.X[n] = image # Create adaptive pixels weights filt_y = y[y != self.n_classes] u_classes = np.unique(filt_y) if len(u_classes): class_weights = class_weight.compute_class_weight( 'balanced', u_classes, filt_y) class_weights = { class_id: w for class_id, w in zip(u_classes, class_weights) } class_weights[self.n_classes] = 0. for yy in u_classes: np.putmask(self.SW[n], y == yy, class_weights[yy]) np.putmask(self.SW[n], y == self.n_classes, 0) sample_dict = {'pred_mask': self.SW} return self.X, self.Y, sample_dict
def __init__(self, folder='/workspace/datasets/', mode='train', n_classes=21, batch_size=1, resize_shape=None, validation_split=.1, seed=7, crop_shape=(640, 320), horizontal_flip=True, blur=0, vertical_flip=0, brightness=0.1, rotation=5.0, zoom=0.1, do_ahisteq=True): self.blur = blur self.histeq = do_ahisteq self.image_path_list = sorted( glob.glob(os.path.join(folder, 'JPEGImages', 'train', '*'))) self.label_path_list = sorted( glob.glob(os.path.join(folder, 'SegmentationClassAug', '*'))) np.random.seed(seed) n_images_to_select = round( len(self.image_path_list) * validation_split) x = np.random.permutation(len( self.image_path_list))[:n_images_to_select] if mode == 'train': x = np.setxor1d(x, np.arange(len(self.image_path_list))) self.image_path_list = [self.image_path_list[j] for j in x] self.label_path_list = [self.label_path_list[j] for j in x] if mode == 'test': self.image_path_list = sorted( glob.glob(os.path.join(folder, 'JPEGImages', 'test', '*')))[:100] self.mode = mode self.n_classes = n_classes self.batch_size = batch_size self.resize_shape = resize_shape self.crop_shape = crop_shape self.horizontal_flip = horizontal_flip self.vertical_flip = vertical_flip self.brightness = brightness self.rotation = rotation self.zoom = zoom # Preallocate memory if self.crop_shape: self.X = np.zeros((batch_size, crop_shape[1], crop_shape[0], 3), dtype='float32') self.SW = np.zeros((batch_size, crop_shape[1] * crop_shape[0]), dtype='float32') self.Y = np.zeros((batch_size, crop_shape[1] * crop_shape[0], 1), dtype='float32') self.F = np.zeros((batch_size, crop_shape[1] * crop_shape[0], 1), dtype='float32') self.F_SW = np.zeros((batch_size, crop_shape[1] * crop_shape[0]), dtype='float32') elif self.resize_shape: self.X = np.zeros( (batch_size, resize_shape[1], resize_shape[0], 3), dtype='float32') self.SW = np.zeros((batch_size, resize_shape[1] * resize_shape[0]), dtype='float32') self.Y = np.zeros( (batch_size, resize_shape[1] * resize_shape[0], 1), dtype='float32') self.F = np.zeros( (batch_size, resize_shape[1] * resize_shape[0], 1), dtype='float32') self.F_SW = np.zeros( (batch_size, resize_shape[1] * resize_shape[0]), dtype='float32') else: raise Exception('No image dimensions specified!')
def within_index(self, *args, **kwargs): A = self.A.within_index(*args, **kwargs) B = self.B.within_index(*args, **kwargs) return setxor1d(A, B, assume_unique=True)
def get_not_indexes(a, indices): """only works for 1D""" ia = np.indices(a.shape) not_indices = np.setxor1d(ia, indices) return not_indices
''' @Author: Sankar @Date: 2021-04-14 08:49:25 @Last Modified by: Sankar @Last Modified time: 2021-04-14 08:55:09 @Title : Numpy_Python-14 ''' ''' Write a Python program to find the set exclusive-or of two arrays. Set exclusive-or will return the sorted, unique values that are in only one (not both) of the input arrays. Array1: [ 0 10 20 40 60 80] Array2: [10, 30, 40, 50, 70] Unique values that are in only one (not both) of the input arrays: [ 0 20 30 50 60 70 80] ''' import numpy as np arr1 = np.array([0, 10, 20, 40, 60, 80]) arr2 = np.array([10, 30, 40, 50, 70]) print(np.setxor1d(arr1, arr2))
# возвращает вместе с индексами ans = s1[~s1.isin(s2)] # возвращает значения ans2 = np.setdiff1d(s1, s2, assume_unique=False) print(ans) # In[20]: # 7. Получить не пересекающиеся элементы в двух объектах Series s1 = pd.Series([1, 2, 3, 4, 5]) s2 = pd.Series([4, 5, 6, 7, 8]) # возвращает вместе с индексами # получаем объединенный Series без повтороений s_union = pd.Series(np.union1d(s1, s2)) # получаем пересекающиеся данные s_intersect = pd.Series(np.intersect1d(s1, s2)) # отбираем все данные, кроме пересекающихся ans = s_union[~s_union.isin(s_intersect)] # возвращает значения ans2 = np.setxor1d(s1, s2, assume_unique=False) print(ans) # In[ ]:
def disjoint_sets(subtraceA, subtraceB): x = len(subtraceA.pages) - len(subtraceB.pages) y = len(np.setxor1d(subtraceA.pages, subtraceB.pages)) return sqrt(x**2 + y**2)
def generate_data(cur_target_class_ids, full_target_class_ids, data, labels, refining, seed=123): np.random.seed(seed) num_of_each_class = 5000 n_final_classes_including_other = 10 full_class_ids = np.arange(1, n_final_classes_including_other + 1) if refining == True: other_class_ids = np.setxor1d(full_class_ids, cur_target_class_ids) else: other_class_ids = np.setxor1d(full_class_ids, full_target_class_ids) labels_copy = np.copy(labels) target_class_indices = np.array([], dtype='int32') for class_id in cur_target_class_ids: target_class_indices = np.append( target_class_indices, np.argwhere(labels_copy == class_id - 1)[:, 0]) # TODO: temp code np.random.shuffle(target_class_indices) print(target_class_indices[:10]) # create new train and test datasets and labels target_class_data = data[target_class_indices, :] target_class_labels = np.squeeze(labels_copy[target_class_indices, :]) target_class_labels_copy = np.copy(target_class_labels) # Reasign the index of target-classes, starting from 1 for i, class_id in enumerate(cur_target_class_ids): target_class_labels[np.argwhere(target_class_labels_copy == class_id - 1)[:, 0]] = i + 1 # Other-class all_other_class_indices = np.array([], dtype='int32') for class_id in other_class_ids: if class_id == other_class_ids[0]: #TODO all_other_class_indices = np.append( all_other_class_indices, np.argwhere(labels_copy == class_id - 1)[:, 0]) print('current other class: ' + str(other_class_ids)) print('current target class: ' + str(cur_target_class_ids)) print('all target target class: ' + str(full_target_class_ids)) print('all class: ' + str(full_class_ids)) print('all other class indices: ' + str(len(all_other_class_indices))) other_class_indices = np.random.choice(all_other_class_indices, num_of_each_class) other_class_data = data[other_class_indices, :] # set 'other' label to zero other_class_labels = np.array([0] * num_of_each_class) print(target_class_labels.shape, other_class_labels.shape) selected_data = np.concatenate((target_class_data, other_class_data)) selected_labels = np.concatenate((target_class_labels, other_class_labels)) return (selected_data, selected_labels)
#%% data preparation # load the matlab data earth_data = sio.loadmat('head.mat') faaut = np.array(earth_data['faaut']).reshape(-1).astype('int') faman = np.array(earth_data['faman']).reshape(-1).astype('int') off = np.array(earth_data['off']).reshape(-1).astype('int') rx = np.array(earth_data['rx']).reshape(-1).astype('int') ry = np.array(earth_data['ry']).reshape(-1).astype('int') sx = np.array(earth_data['sx']).reshape(-1).astype('int') sy = np.array(earth_data['sy']).reshape(-1).astype('int') # seperate the uncertain and certain data index uct_idx = np.where(faman < 0)[0] # uncertain index uct_pct = len(uct_idx) * 1.0 / len(faman) # uncertain percentage idx_all = np.arange(len(faman)) cet_idx = np.setxor1d(idx_all, uct_idx) # certain index # all_data = np.zeros((len(faaut), 7)) all_data[:, 0] = off all_data[:, 1] = rx all_data[:, 2] = ry all_data[:, 3] = sx all_data[:, 4] = sy all_data[:, 5] = faaut all_data[:, 6] = faman all_data_df = pd.DataFrame( all_data, columns=['off', 'rx', 'ry', 'sx', 'sy', 'faaut', 'faman']) # certain_data = all_data[cet_idx, :] uncertain_data = all_data[uct_idx, :] #
def kmeans(path): folders = os.listdir( path) #Se crea una lista con los nombres de todas las carpetas dentro folders = np.asarray(folders) files = [ ] #inicializa lista para los nombres de los archivos pertenecientes a cada carpeta for f in folders: provisional_path = path + "/" + f #Se inicializa variable con la dirección anterior de la variable 'path' + el nombre de una carpeta 'x' dentro de StopSearch_2011_2017 n_classes = get_n_classes(provisional_path) files.append([provisional_path, n_classes]) files = np.asarray(files) count = 0 file_index = 0 #results=[] final_tags = [] for i in range(0, len(files)): file_results = [] prom_folds = [] for K in range(3, 9): print("i: " + str(i) + ", K: " + str(K)) file_name = folders[i] X = np.asarray(get_data(files[i][0])) X = soft_max(X) #print(X) #time.sleep(5) file_tags = get_tags(files[i][0]) X_shape = np.shape(X) k_index = np.random.choice( int(X_shape[0]), int(K), replace=False ) #Selecciona indices aleatorios de elementos de la matriz original "X" centroids = X[k_index, :] #Asigna centroides aleatorios matriz_indx = matriz_indices(len(X), len(centroids)) matriz_distancias = distances_matrix(X, centroids) sort_index(matriz_distancias, matriz_indx) clusters = iniciar_clusters(X, matriz_indx, K) old_centroids = centroids new_centroids = calculate_new_centroids(centroids, clusters) j = 0 while (olds_vs_news(old_centroids, new_centroids)): matriz_indx = matriz_indices(len(X), len(centroids)) matriz_distancias = distances_matrix(X, centroids) sort_index(matriz_distancias, matriz_indx) clusters = iniciar_clusters(X, matriz_indx, K) old_centroids = centroids new_centroids = calculate_new_centroids(centroids, clusters) centroids = new_centroids j += 1 #file_results.append(db_index(np.asarray(centroids),np.asarray(clusters),len(X))) #time.sleep(5) print("\n\n-------------------------------------\n\n") for element in matriz_indx: final_tags.append(element[0]) data = X #time.sleep(5) k_folds = 5 all_index = np.arange(0, len(data)) min_limit, max_limit = 0, mt.ceil(len(data) / float(k_folds)) indices_list = np.arange(min_limit, max_limit) radios_list = radios(centroids) Y = norm_Y(file_tags) const_limit = mt.ceil(len(data) / float(k_folds)) cont_mcc = 0 for kf in range(0, k_folds): print("fold: " + str(kf + 1)) #print(str(min_limit)+","+str(max_limit)) fold = np.arange(int(min_limit), int(max_limit)) #print(fold) i_proof = data[fold, :] i_proof_y = Y[fold] i_proof = np.asarray(i_proof) i_proof_y = np.asarray(i_proof_y) train_index = np.setxor1d(fold, all_index) i_train = data[train_index, :] i_train_y = Y[train_index] phi_1 = [] phi_1.append( np.reshape(np.kron(np.ones((len(i_train), 1)), [1]), (len(i_train)))) #ADD BIAS for cn in range(0, len(centroids)): phi_1.append(RBF(i_train, centroids[cn], radios_list[cn])) phi_1 = np.asarray(phi_1) phi_1 = np.linalg.pinv(phi_1) phi_1 = np.transpose(phi_1) #print(i_train_y) m_target = Ytarget(Y, i_train_y) """print(m_target) print("len: "+str(len(m_target))) time.sleep(5)""" W = M_1xM_2(phi_1, m_target) W = np.transpose(W) phi_2 = [] phi_2.append( np.reshape(np.kron(np.ones((len(i_proof), 1)), [1]), (len(i_proof)))) for cn in range(0, len(centroids)): phi_2.append(RBF(i_proof, centroids[cn], radios_list[cn])) phi_2 = np.asarray(phi_2) phi_2 = np.transpose(phi_2) y_net = [] #np_unhs=list(i_proof_y) len_set = len(list(set(Y))) #print(len_set) for d in range(0, len(phi_2)): x = phi_2[d] x = x.reshape(1, len(x)) x = np.transpose(x) z = M_1xM_2(W, x) x_tags = matriz_indices(1, len_set) z = np.transpose(z) sort_index(z, x_tags) y_net.append(x_tags[0][len(x_tags) - 1]) y_net = np.asarray(y_net) cont_mcc += matthews_corrcoef(i_proof_y, y_net) min_limit = max_limit max_limit += const_limit if (max_limit > len(data)): max_limit = len(data) - 1 prom_folds.append(cont_mcc / 5) #GRAFICAR PROM_FOLDS a este nivel prom_folds = np.asarray(prom_folds) labels_k = ["k-3", "k-4", "k-5", "k-6", "k-7", "k-8"] #labels_x=[] index_mcc = np.arange(len(labels_k)) #print("shape folds") #print(np.shape(prom_folds)) #print(prom_folds) time.sleep(5) plt.subplots(figsize=(9, 6)) plt.xticks(index_mcc, labels_k, rotation="vertical") #plt.yticks(axis_y,labels_y) n = 0 plt.plot(prom_folds, color="r", marker="o", linestyle='--', label="MCC") plt.title(folders[i]) plt.savefig(folders[i] + ".png") #plt.show() plt.close()
# Write a NumPy program to find the set exclusive-or of two arrays. Set exclusive-or will return the sorted, unique values that are in only one (not both) of the input arrays. import numpy as np array1=np.array([0,10,20,40,60,80]) print("array1:\n",array1) array2=np.array([10, 30, 40, 50, 70, 90]) print("array1:\n",array2) print("resultant array\n",np.setxor1d(array1,array2))
import numpy as np arr = np.array([1, 1, 1, 2, 3, 4, 5, 5, 6, 7]) x = np.unique(arr) print(x) arr1 = np.array([1, 2, 3, 4]) arr2 = np.array([3, 4, 5, 6]) newarr = np.union1d(arr1, arr2) print(newarr) arr1 = np.array([1, 2, 3, 4]) arr2 = np.array([3, 4, 5, 6]) newarr = np.intersect1d(arr1, arr2, assume_unique=True) print(newarr) set1 = np.array([1, 2, 3, 4]) set2 = np.array([3, 4, 5, 6]) newarr = np.setdiff1d(set1, set2, assume_unique=True) print(newarr) set1 = np.array([1, 2, 3, 4]) set2 = np.array([3, 4, 5, 6]) newarr = np.setxor1d(set1, set2, assume_unique=True) print(newarr)
total = round(sum(cargas) / 3) # El valor deseado para la suma de los grupos. numero_grupos = 3 # Se crean los grupos resultados = [None] * numero_grupos for i in range(1, numero_grupos): data = grupos_suma(cargas, total) seen = set() result = [] for d in data: # Se eliminan los grupos que solo sean permutaciones de otro. if frozenset(d) not in seen: result.append(d) seen.add(frozenset(d)) resultados[i] = result[ 0] # Se van a crean muchos grupos que cumplan con la suma, solo se toma el primero cargas = np.setxor1d(list(cargas), resultados[i]) # Cargas sin acomodar aún. print() print(f'Cargas: {cargas_original}') print() print("Grupos:") for i in range(1, numero_grupos): print(resultados[i]) print(f'Suma: {round(sum(resultados[i]),2)}')
def setminus(a, b): a = np.array(a) b = np.array(b) intersect = np.intersect1d(a, b) diff = np.setxor1d(a, intersect) return diff
# 集合运算 if __name__ == '__main__': # 5.6 唯一和集合逻辑 names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe']) np.unique(names) # 取唯一后进行排序 # 集合的函数 values = np.array([6, 0, 0, 3, 2, 5, 6]) np.in1d(values, [2, 3, 6]) # values的每个元素是否在[2, 3, 6]中。 # 返回array([ True, False, False, True, True, False, True], dtype=bool) np.unique(values) # 唯一元素 np.intersect1d(values, values) # 交集 np.union1d(values, values) # 并集 np.setdiff1d(values, values) # 前-后的差集 np.setxor1d(values, values) # 对称差。 # 二、np的高级应用 # 1、判断ndarray的数据类型 if __name__ == '__main__': ints = np.ones(10, dtype=np.uint16) floats = np.ones(10, dtype=np.float32) np.issubdtype(ints.dtype, np.integer) # 判断数据类型 np.issubdtype(floats.dtype, np.floating) np.float64.mro() # np.float64所有的父类 # 2、重塑 if __name__ == '__main__':
print(type(arr), arr) #appent values at the end of arrays arr = array("i", [1,5,9,6,5,4,8]) print(arr) arr = np.append(arr,[55,80]) print(arr) #intersection of two array array1 = np.array([1,5,8,9,6,2,4]) array2 = np.array([8,9,45,65]) print(np.intersect1d(array1, array2)) #Differance of two array array1 = np.array([1,5,8,9,6,2,4]) array2 = np.array([8,9,45,65]) print(np.setdiff1d(array1, array2)) #Unique of two array array1 = np.array([1,5,8,9,6,2,4]) array2 = np.array([8,9,45,65]) print(np.setxor1d(array1, array2)) #Compair two array a = [1, 2] b = [4, 5] print(np.greater_equal(a, b)) print(np.greater(a, b)) print(np.less_equal(a, b)) print(np.less(a, b))
def findDuplicateVectors(vec, tol=vTol, equivPM=False): """ Find vectors in an array that are equivalent to within a specified tolerance USAGE: eqv = DuplicateVectors(vec, *tol) INPUT: 1) vec is n x m, a double array of m horizontally concatenated n-dimensional vectors. *2) tol is 1 x 1, a scalar tolerance. If not specified, the default tolerance is 1e-14. *3) set equivPM to True if vec and -vec are to be treated as equivalent OUTPUT: 1) eqv is 1 x p, a list of p equivalence relationships. NOTES: Each equivalence relationship is a 1 x q vector of indices that represent the locations of duplicate columns/entries in the array vec. For example: | 1 2 2 2 1 2 7 | vec = | | | 2 3 5 3 2 3 3 | eqv = [[1x2 double] [1x3 double]], where eqv[0] = [0 4] eqv[1] = [1 3 5] """ vlen = vec.shape[1] vlen0 = vlen orid = np.asarray(list(range(vlen)), dtype="int") torid = orid.copy() tvec = vec.copy() eqv = [] eqvTot = 0 uid = 0 ii = 1 while vlen > 1 and ii < vlen0: dupl = np.tile(tvec[:, 0], (vlen, 1)) if not equivPM: diff = abs(tvec - dupl.T).sum(0) match = abs(diff[1:]) <= tol # logical to find duplicates else: diffn = abs(tvec - dupl.T).sum(0) matchn = abs(diffn[1:]) <= tol diffp = abs(tvec + dupl.T).sum(0) matchp = abs(diffp[1:]) <= tol match = matchn + matchp kick = np.hstack([True, match]) # pick self too if kick.sum() > 1: eqv += [torid[kick].tolist()] eqvTot = np.hstack([eqvTot, torid[kick]]) uid = np.hstack([uid, torid[kick][0]]) cmask = np.ones((vlen, )) cmask[kick] = 0 cmask = cmask != 0 tvec = tvec[:, cmask] torid = torid[cmask] vlen = tvec.shape[1] ii += 1 if len(eqv) == 0: eqvTot = [] uid = [] else: eqvTot = eqvTot[1:].tolist() uid = uid[1:].tolist() # find all single-instance vectors singles = np.sort(np.setxor1d(eqvTot, list(range(vlen0)))) # now construct list of unique vector column indices uid = np.int_(np.sort(np.union1d(uid, singles))).tolist() # make sure is a 1D list if not hasattr(uid, '__len__'): uid = [uid] return eqv, uid
def arithmetical_operation(self, a, operation, in_place=False): """ Performs given arithmetical operation with :math:`a` operand, the operation can be either performed on a copy or in-place. Parameters ---------- a : numeric or ndarray or Signal Operand. operation : object Operation to perform. in_place : bool, optional Operation happens in place. Returns ------- Signal Continuous signal. Examples -------- Adding a single *numeric* variable: >>> range_ = np.linspace(10, 100, 10) >>> signal_1 = Signal(range_) >>> print(signal_1) [[ 0. 10.] [ 1. 20.] [ 2. 30.] [ 3. 40.] [ 4. 50.] [ 5. 60.] [ 6. 70.] [ 7. 80.] [ 8. 90.] [ 9. 100.]] >>> print(signal_1.arithmetical_operation(10, '+', True)) [[ 0. 20.] [ 1. 30.] [ 2. 40.] [ 3. 50.] [ 4. 60.] [ 5. 70.] [ 6. 80.] [ 7. 90.] [ 8. 100.] [ 9. 110.]] Adding an *array_like* variable: >>> a = np.linspace(10, 100, 10) >>> print(signal_1.arithmetical_operation(a, '+', True)) [[ 0. 30.] [ 1. 50.] [ 2. 70.] [ 3. 90.] [ 4. 110.] [ 5. 130.] [ 6. 150.] [ 7. 170.] [ 8. 190.] [ 9. 210.]] Adding a :class:`colour.continuous.Signal` class: >>> signal_2 = Signal(range_) >>> print(signal_1.arithmetical_operation(signal_2, '+', True)) [[ 0. 40.] [ 1. 70.] [ 2. 100.] [ 3. 130.] [ 4. 160.] [ 5. 190.] [ 6. 220.] [ 7. 250.] [ 8. 280.] [ 9. 310.]] """ operation, ioperator = { '+': (add, iadd), '-': (sub, isub), '*': (mul, imul), '/': (div, idiv), '**': (pow, ipow) }[operation] if in_place: if isinstance(a, Signal): self[self._domain] = operation(self._range, a[self._domain]) exclusive_or = np.setxor1d(self._domain, a.domain) self[exclusive_or] = np.full(exclusive_or.shape, np.nan) else: self.range = ioperator(self.range, a) return self else: copy = ioperator(self.copy(), a) return copy
# set arrays should only be 1-d arrays. import numpy as np x1 = np.array([1, 1, 1, 3, 4, 4, 6, 6, 8, 4, 3, 5, 7, 3, 2, 5, 6]) x2 = np.unique(x1) # finding unique elements from set of array print(x2) x3 = np.array([1, 1, 1, 3, 4, 4, 6, 6]) x4 = np.array([8, 4, 3, 5, 7, 3, 2, 5, 6]) x5 = np.union1d(x3, x4) # finding unique elements from both arrays print(x5) x6 = np.array([1, 1, 1, 3, 4, 4, 6, 6]) x7 = np.array([8, 4, 3, 5, 7, 3, 2, 5, 6]) x8 = np.intersect1d(x6, x7, assume_unique=True) # assume_unique speed up computation print(x8) # finding similar values from both arrays y1 = np.array([1, 1, 1, 3, 4, 4, 6, 6]) y2 = np.array([8, 4, 3, 5, 7, 3, 2, 5, 6]) y3 = np.setdiff1d( y1, y2, assume_unique=True ) # finding only values of first set that is not present in second set print(y3) y4 = np.array([1, 1, 1, 3, 4, 4, 6, 6]) y5 = np.array([8, 4, 3, 5, 7, 3, 2, 5, 6]) y6 = np.setxor1d( y4, y5, assume_unique=True) # finding values that present in both sets print(y6)
def pointsFromShapes(shapes, bounds, dx=10.0, nmax=None, Nsamp=None, touch_center=True): """Get yes/no points from shapefile input - same as sampleFromShapes but without class balance or separation of test and train, only samples in box enclosing the polygons :param shapes: Sequence of projected shapes. :param bounds: Tuple of xmin, ymin, xmax, ymax, in lat/lon coordinates, only will accept points from within these bounds :param dx: resolution of sampling in X and Y (meters), must be a round number of meters :param nmax: if not None, maximum allowed number of mesh points in X and Y together (nrows*ncols). Overrides dx. :param Nsamp: if not None, maximum number of total samples, keeps proportion of yes's and no's the same :param touch_center: Boolean indicating whether presence of polygon in each grid cell is enough to turn that into a yes pixel. Setting this to false presumes that the dx is relatively large, such that creating a grid at that resolution will not tax the resources of the system. :returns: - sequence of coordinates in lat/lon for: YesPoints, NoPoints - numpy array of mesh column centers - numpy array of mesh row centers - PyProj object defining orthographic projection of xy points """ xmin, ymin, xmax, ymax = bounds shptype = shapes[0]['geometry']['type'] if shptype not in ['Polygon']: raise Exception('Only polygon data types supported!') #Get the shapes projected into an orthographic projection centered on the data pshapes, proj = getProjectedShapes(shapes, xmin, xmax, ymin, ymax) # Get the projected bounds project = partial(pyproj.transform, pyproj.Proj(proj='latlong', datum='WGS84'), proj) bbPoly = Polygon(((xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin))) bbPolyproj = transform(project, bbPoly) if Nsamp is not None: # Recompute dx, underestimate by dividing by 1.5 so later trimming doesn't reduce desired total projbounds = bbPolyproj.bounds dx = np.round( np.sqrt(((projbounds[2] - projbounds[0]) * (projbounds[3] - projbounds[1])) / (Nsamp)) / 1.5) #get the "yes" sample points yespoints, nrows, ncols, xvar, yvar, yesidx = getYesPoints( pshapes, proj, dx, nmax=nmax, touch_center=touch_center) # sampleNo but with taking all of the points instead of just some of them randomly allidx = np.arange(0, len(xvar) * len(yvar)) # flattened array of all indices in mesh noidx = np.setxor1d(allidx, yesidx) # allidx - avoididx rowidx, colidx = np.unravel_index(noidx, (len(yvar), len(xvar))) nopoints = [] for row, col in zip(rowidx, colidx): xp = xvar[col] yp = yvar[row] nopoints.append((xp, yp)) nopoints = np.array(nopoints) # Only accept points inside the bounds bbPath = mplPath.Path( (list(zip(*np.array(bbPolyproj.exterior.coords.xy))))) yespoints = yespoints[bbPath.contains_points(yespoints)] nopoints = nopoints[bbPath.contains_points(nopoints)] totalpoints = (len(nopoints) + len(yespoints)) if Nsamp is not None and totalpoints > Nsamp: ratioyes = float(len(yespoints)) / totalpoints keepy = np.round(ratioyes * Nsamp) indy = np.random.randint(0, len(yespoints), size=keepy) indn = np.random.randint(0, len(nopoints), size=Nsamp - keepy) yespoints = yespoints[indy, :] nopoints = nopoints[indn, :] elif totalpoints < Nsamp: print(( 'Only collected %1.0f points out of desired %1.0f points due to bound restrictions' % (totalpoints, Nsamp))) #project all of the point data sets back to lat/lon yespoints = projectBack(yespoints, proj) nopoints = projectBack(nopoints, proj) return (yespoints, nopoints, xvar, yvar, pshapes, proj)
import numpy as np try: array = np.array([0, 10, 20, 40, 60, 80]) print(array) array1 = np.array([10, 30, 40, 50, 70, 90]) print(array1) array3 = np.setxor1d(array, array1) print("The array obtained after exclusive or operation is: ", array3) except: print("Syntax error")
def plot_heavy_bars(ax, h_attr_arr, h_clients_arr, nh_attr_arr, nh_clients_arr, xlabel, xulim, yulim, title, width=0.4, xllim=0, yllim=0, conv_xaxis=False, pct_label=True, xlabel_ra=False): """ Given an axis, list of 12 arrays and graph variables, prepare a bar chart graph. Params: ax (axis): axis for the plot h_attr_arr (array): heavy attribute array h_clients_arr (array): heavy number of clients array nh_attr_arr (array): non-heavy attribute array nh_clients_arr (array): non-heavy number of clients array xlabel (string): x-axis label xulim (number): upper limit for x-axis yulim (number): upper limit for y-axis title (string): title for the plot width (number): width of bars (default 0.4) xllim (number): lower limit for x-axis (default 0) yllim (number): lower limit for y-axis (default 0) conv_xaxis (boolean): convert the x-axis arrays to number (default False) pct_label (boolean): add the percent label on top of the bar (default True) xlabel_ra (boolean): align the xlabel to the right, for long labels (default False) """ if (conv_xaxis): xllim = 1 if (len(h_attr_arr) != len(nh_attr_arr)): print 'Mismatch in xarray lengths!' extras = np.setxor1d(h_attr_arr, nh_attr_arr) for item in extras: print item if item in nh_attr_arr: # extra item in non-heavy array nhidx = nh_attr_arr.tolist().index(item) print 'Delete ' + item + ' from non-heavy array' nh_attr_arr = np.delete(nh_attr_arr, nhidx) nh_clients_arr = np.delete(nh_clients_arr, nhidx) else: # extra item in heavy array hidx = h_attr_arr.tolist().index(item) print('Insert ' + item + ' into non-heavy array') nh_attr_arr = np.insert(nh_attr_arr, hidx, item) nh_clients_arr = np.insert(nh_clients_arr, hidx, 0) xticks = array(list(range(1, len(h_attr_arr) + 1))) ax.set_xticks(xticks) if (xlabel_ra): ax.set_xticklabels(h_attr_arr, rotation=45, ha='right') else: ax.set_xticklabels(h_attr_arr, rotation=45) h_attr_arr = xticks nh_attr_arr = xticks # the bar chart nhbars = ax.bar(nh_attr_arr - width / 2, nh_clients_arr, width, label='Non Heavy', color='lightsalmon') hbars = ax.bar(h_attr_arr + width / 2, h_clients_arr, width, label='Heavy', color='mediumaquamarine') ax.set_xlabel(xlabel) ax.set_xlim(xllim - 1, xulim) ax.set_ylabel('Number of Clients') ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5) ax.set_ylim(yllim, yulim) ax.set_title(title) ax.legend(loc='best') yrange = yulim - yllim if (pct_label): # Add percent labels above the heavy bars for hrect, nhrect in zip(hbars, nhbars): xloc = hrect.get_x() if (xloc >= xllim and xloc <= (xulim - 1)): hheight = hrect.get_height() nhheight = nhrect.get_height() htext = hheight * 1.0 / (hheight + nhheight) * 100 ax.text(xloc + hrect.get_width() / 2.0, hheight + .01 * yrange, '{0:,.2f}%'.format(htext), ha='center', va='bottom', rotation='vertical', fontsize=8) return None