def findCoords(gs, candidates=None): if candidates == None: candidates=[] # List all the possible z-level (heights) zRange = list(takewhile(lambda x : x < gs.boardSize[2], \ sort(unique(flatten(gs.heightMap()))))) if zRange==[]: print "Board is full, cannot find legal coordinates !" return None else: zRange = sort(unique(map(third,candidates))) # Do we have a choice on the z-level ? if len(zRange)==1: z = zRange[0] else: print "\n",gs.boardToASCII(markedCubes=candidates) # Discard the z height max if zRange[-1]==gs.boardSize[2]: zRange = zRange[:-1] z = -1+input("Which z-level ? (%d-%d)\n> " \ % (zRange[0]+1,zRange[-1]+1)) candidates = filter(lambda c: c[2]==z, candidates) if len(candidates)>1: # Display the z-level with xy coordinates as letter-number print ' '+''.join(chr(97+x) for x in xrange(gs.boardSize[0])) print ' +'+'-'*gs.boardSize[0] lines = gs.boardToASCII(zRange=[z],markedCubes=candidates)\ .split('\n') for y in xrange(gs.boardSize[1]): print '%s |%s' % (str(y+1).zfill(2),lines[y]) print "\n" xy = raw_input("Which xy coordinates ?\n> ") return array([ord(xy[0])-97,int(xy[1:])-1,z]) else: return candidates[0]
def test_unique_axis_zeros(self): # issue 15559 single_zero = np.empty(shape=(2, 0), dtype=np.int8) uniq, idx, inv, cnt = unique(single_zero, axis=0, return_index=True, return_inverse=True, return_counts=True) # there's 1 element of shape (0,) along axis 0 assert_equal(uniq.dtype, single_zero.dtype) assert_array_equal(uniq, np.empty(shape=(1, 0))) assert_array_equal(idx, np.array([0])) assert_array_equal(inv, np.array([0, 0])) assert_array_equal(cnt, np.array([2])) # there's 0 elements of shape (2,) along axis 1 uniq, idx, inv, cnt = unique(single_zero, axis=1, return_index=True, return_inverse=True, return_counts=True) assert_equal(uniq.dtype, single_zero.dtype) assert_array_equal(uniq, np.empty(shape=(2, 0))) assert_array_equal(idx, np.array([])) assert_array_equal(inv, np.array([])) assert_array_equal(cnt, np.array([])) # test a "complicated" shape shape = (0, 2, 0, 3, 0, 4, 0) multiple_zeros = np.empty(shape=shape) for axis in range(len(shape)): expected_shape = list(shape) if shape[axis] == 0: expected_shape[axis] = 0 else: expected_shape[axis] = 1 assert_array_equal(unique(multiple_zeros, axis=axis), np.empty(shape=expected_shape))
def findMove(gs, askApply=True): moves = gs.legalMoves() if len(moves)==1: print "Only one move possible :\n", moveToASCII(moves[0]) else: ok = False while not ok: # First, pick a block blkId = findBlock(gs,candidates=unique(map(snd,moves))) assert blkId != None # since we checked that len(lm) was > 0 # Filter the moves that have the selected block id moves = filter(lambda m : m[1]==blkId, moves) # Then, find the coordinates on the board coords = findCoords(gs,candidates=unik(map(fst,moves))) # Filter the moves that have the selected coordinates moves = filter(lambda m : (m[0]==coords).all(), moves) # Finally, find its variation blkVarId = findVariation(gs,blkId, \ candidates=unique(map(third,moves))) move = (coords,blkId,blkVarId) print "You have selected :\n", moveToASCII(moves[0]) print "Is this the move you wanted ? [Y/n]" if raw_input("") in ["n","N"]: # Will start again with all legal moves possibles moves = gs.legalMoves() else: ok = True if askApply: print "Do you want to play this move over the current gamestate ?",\ " [Y/n]" if raw_input("") not in ["n","N"]: gs.playMove(move) return move
def __init__(self, string): #TODO: its_terminal from MyModule.funcs import its_terminal, its_variable # all lower letters are terminal self.terminals = [c for c in string if c.islower()] self.terminals = list(unique(self.terminals)) # all upper letters are variable self.variables = [c for c in string if c.isupper()] self.variables = list(unique(self.variables)) self.form = string
def __init__(self, string): # split string from '->' and create two wing with each side left_side, right_side = string.split("->") self.left_wing = Wing(left_side) self.right_wing = Wing(right_side) # find uniqe terminals in both wing self.terminals = list( unique(self.right_wing.terminals + self.left_wing.terminals)) # find uniqe variables in both side self.variables = list( unique(self.right_wing.variables + self.left_wing.variables)) self.form = str(self)
def print_unique_counts(d): column_list = d.columns.tolist() print "number of rows: {}".format(len(d[column_list[0]])) print "" for c in column_list: print "number of unique {}: {}".format(c, len(arraysetops.unique(d[c])))
def uniqueIdx(L): """ Find indexes of unique elements in L based on their string representation (works both for cubes and blocks) """ return list(snd(unique([str(x) for x in L], return_index=True)))
def __init__(self, name, mesh, dof, value): self.name = name self.tag = mesh.field_data[name][0] self.dim = mesh.field_data[name][1] self.local_dof = np.asarray(dof) self.value = value if self.dim == 0: # array containing indices of elements in the boundary self.elements = np.nonzero( mesh.cell_data_dict["gmsh:physical"]["vertex"] == self.tag)[0] # array containing indices of nodes in the boundary self.nodes = unique(mesh.cells_dict["vertex"][self.elements]) elif self.dim == 1: self.elements = np.nonzero( mesh.cell_data_dict["gmsh:physical"]["line"] == self.tag)[0] self.nodes = unique(mesh.cells_dict["line"][self.elements])
def s_test_function( bin_gdf: GeoDataFrame, t_yrs: float, n_iters: int, likelihood_fn: str, prospective: bool = False, critical_pct: float = 0.25, not_modeled_likelihood: float = 0.0, append_results: bool = False, ): N_obs = len(get_total_obs_eqs(bin_gdf, prospective=prospective)) N_pred = get_model_annual_eq_rate(bin_gdf) * t_yrs N_norm = N_obs / N_pred bin_like_cfg = { "investigation_time": t_yrs, "likelihood_fn": likelihood_fn, "not_modeled_likelihood": not_modeled_likelihood, "n_iters": n_iters, } bin_likes = s_test_gdf_series(bin_gdf, bin_like_cfg, N_norm) obs_likes = np.array([bl[0] for bl in bin_likes]) stoch_likes = np.vstack([bl[1] for bl in bin_likes]).T bad_bins = list(unique(list(chain(*[bl[2] for bl in bin_likes])))) obs_like_total = sum(obs_likes) stoch_like_totals = np.sum(stoch_likes, axis=1) if append_results: bin_pcts = [] for i, obs_like in enumerate(obs_likes): stoch_like = stoch_likes[:, i] bin_pct = len(stoch_like[stoch_like <= obs_like]) / n_iters bin_pcts.append(bin_pct) bin_gdf["S_bin_pct"] = bin_pcts bin_gdf["N_model"] = bin_gdf.SpacemagBin.apply( lambda x: get_n_eqs_from_mfd(x.get_rupture_mfd()) * t_yrs) bin_gdf["N_obs"] = bin_gdf.SpacemagBin.apply( lambda x: get_n_eqs_from_mfd(x.observed_earthquakes)) pctile = (len(stoch_like_totals[stoch_like_totals <= obs_like_total]) / n_iters) test_pass = True if pctile >= critical_pct else False test_res = "Pass" if test_pass else "Fail" test_result = { "critical_pct": critical_pct, "percentile": pctile, "test_pass": bool(test_pass), "test_res": test_res, "bad_bins": bad_bins, } return test_result
def calc_gini_group_score(group): unique_labels = unique(group) size = len(group) scores = 0.0 if size == 0: return 0 for label in unique_labels: p = 0.0 label_cnt = len(group[group == label]) p = label_cnt / size scores += p * p return 1 - scores
def test_unique_axis(self): types = [] types.extend(np.typecodes['AllInteger']) types.extend(np.typecodes['AllFloat']) types.append('datetime64[D]') types.append('timedelta64[D]') types.append([('a', int), ('b', int)]) types.append([('a', int), ('b', float)]) for dtype in types: self._run_axis_tests(dtype) msg = 'Non-bitwise-equal booleans test failed' data = np.arange(10, dtype=np.uint8).reshape(-1, 2).view(bool) result = np.array([[False, True], [True, True]], dtype=bool) assert_array_equal(unique(data, axis=0), result, msg) msg = 'Negative zero equality test failed' data = np.array([[-0.0, 0.0], [0.0, -0.0], [-0.0, 0.0], [0.0, -0.0]]) result = np.array([[-0.0, 0.0]]) assert_array_equal(unique(data, axis=0), result, msg)
def check_all(a, b, i1, i2, c, dt): base_msg = "check {0} failed for type {1}" msg = base_msg.format("values", dt) v = unique(a) assert_array_equal(v, b, msg) msg = base_msg.format("return_index", dt) v, j = unique(a, True, False, False) assert_array_equal(v, b, msg) assert_array_equal(j, i1, msg) msg = base_msg.format("return_inverse", dt) v, j = unique(a, False, True, False) assert_array_equal(v, b, msg) assert_array_equal(j, i2, msg) msg = base_msg.format("return_counts", dt) v, j = unique(a, False, False, True) assert_array_equal(v, b, msg) assert_array_equal(j, c, msg) msg = base_msg.format("return_index and return_inverse", dt) v, j1, j2 = unique(a, True, True, False) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, i2, msg) msg = base_msg.format("return_index and return_counts", dt) v, j1, j2 = unique(a, True, False, True) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, c, msg) msg = base_msg.format("return_inverse and return_counts", dt) v, j1, j2 = unique(a, False, True, True) assert_array_equal(v, b, msg) assert_array_equal(j1, i2, msg) assert_array_equal(j2, c, msg) msg = base_msg.format( ("return_index, return_inverse " "and return_counts"), dt ) v, j1, j2, j3 = unique(a, True, True, True) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, i2, msg) assert_array_equal(j3, c, msg)
def __init__(self, file_path): matlab_data = loadmat(file_path) self.train_data = matlab_data['dataset'][0][0][0][0][0][0] self.train_data_labels = matlab_data['dataset'][0][0][0][0][0][ 1].flatten() self.train_data_labels = self.train_data_labels - np.min( self.train_data_labels) self.num_classes = len(unique(self.train_data_labels)) self.data_dim = self.train_data.shape[1] self.test_data = matlab_data['dataset'][0][0][1][0][0][0] self.test_data_labels = matlab_data['dataset'][0][0][1][0][0][ 1].flatten() self.test_data_labels = self.test_data_labels - np.min( self.test_data_labels)
def check_all(a, b, i1, i2, c, dt): base_msg = 'check {0} failed for type {1}' msg = base_msg.format('values', dt) v = unique(a) assert_array_equal(v, b, msg) msg = base_msg.format('return_index', dt) v, j = unique(a, 1, 0, 0) assert_array_equal(v, b, msg) assert_array_equal(j, i1, msg) msg = base_msg.format('return_inverse', dt) v, j = unique(a, 0, 1, 0) assert_array_equal(v, b, msg) assert_array_equal(j, i2, msg) msg = base_msg.format('return_counts', dt) v, j = unique(a, 0, 0, 1) assert_array_equal(v, b, msg) assert_array_equal(j, c, msg) msg = base_msg.format('return_index and return_inverse', dt) v, j1, j2 = unique(a, 1, 1, 0) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, i2, msg) msg = base_msg.format('return_index and return_counts', dt) v, j1, j2 = unique(a, 1, 0, 1) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, c, msg) msg = base_msg.format('return_inverse and return_counts', dt) v, j1, j2 = unique(a, 0, 1, 1) assert_array_equal(v, b, msg) assert_array_equal(j1, i2, msg) assert_array_equal(j2, c, msg) msg = base_msg.format(('return_index, return_inverse ' 'and return_counts'), dt) v, j1, j2, j3 = unique(a, 1, 1, 1) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, i2, msg) assert_array_equal(j3, c, msg)
def check_all(a, b, i1, i2, c, dt): base_msg = 'check {0} failed for type {1}' msg = base_msg.format('values', dt) v = unique(a) assert_array_equal(v, b, msg) msg = base_msg.format('return_index', dt) v, j = unique(a, True, False, False) assert_array_equal(v, b, msg) assert_array_equal(j, i1, msg) msg = base_msg.format('return_inverse', dt) v, j = unique(a, False, True, False) assert_array_equal(v, b, msg) assert_array_equal(j, i2, msg) msg = base_msg.format('return_counts', dt) v, j = unique(a, False, False, True) assert_array_equal(v, b, msg) assert_array_equal(j, c, msg) msg = base_msg.format('return_index and return_inverse', dt) v, j1, j2 = unique(a, True, True, False) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, i2, msg) msg = base_msg.format('return_index and return_counts', dt) v, j1, j2 = unique(a, True, False, True) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, c, msg) msg = base_msg.format('return_inverse and return_counts', dt) v, j1, j2 = unique(a, False, True, True) assert_array_equal(v, b, msg) assert_array_equal(j1, i2, msg) assert_array_equal(j2, c, msg) msg = base_msg.format(('return_index, return_inverse ' 'and return_counts'), dt) v, j1, j2, j3 = unique(a, True, True, True) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, i2, msg) assert_array_equal(j3, c, msg)
def check_array_type(X, y, type): if y is not None: check_X_y(X, y) if type in ['cd', 'd']: yu, yc = at.unique(y, return_counts=True) if sum(yc) != len(y): np.testing.assert_equal(y.dtype, np.int) elif y.dtype != np.int: y = np.asarray(y, np.int) if type == 'c': np.testing.assert_equal(y.dtype, np.float) if type in ['c', 'd']: X = np.hstack((X, y.reshape((len(y), 1)))) y = None else: check_array(X) if type == 'c': np.testing.assert_equal(X.dtype, np.float) if type == 'd': np.testing.assert_equal(X.dtype, np.int) if type == 'cd': raise ValueError("y has to be defined for type cd") return X, y
def plot_points( ax: Axes, x: Union[torch.Tensor, np.ndarray], y: Union[torch.Tensor, np.ndarray], cmap: List[Tuple], ): for i_c, c in enumerate(unique(y)): ax.plot( x[:, 0][y == c], x[:, 1][y == c], "o", markersize=3.5, markerfacecolor=(*cmap[i_c], 0.95), markeredgewidth=1.2, markeredgecolor=(*colours_rgb["white"], 0.5), label=f"Class {c}", ) return ax
def ConvertToContinue(c, sigma=0.01): ''' Convert a discrete variable in continuous variable by applying a gaussian distribution in each point Parameters ---------- c=discrete variable sigma=standard deviation of the gaussian distribution Returns ------- newc=continuous variable ''' cu = at.unique(c) newc = c.copy() newc = newc.astype(float) for cui in cu: ind = np.where(c == cui)[0] newc[ind] = sigma * np.random.randn(len(ind)) + cui #MP.plot(c, '.') #MP.plot(newc, 'r+') #MP.ylim(cu[0]-0.5, cu[-1]+05) return newc
def find_key_length(freq, attempt): possible_keys = [] # the best config so far - about 85% accuracy if attempt == 1: betapeaks, _ = find_peaks(freq, height=17,distance=4,prominence=17) else: betapeaks, _ = find_peaks(freq, height=17, distance=4, prominence=13) possible_keys.append([j-i for i, j in zip(betapeaks[:-1], betapeaks[1:])]) # Filter out lengths of occurrence diffs (possible key lengths less than 6 and greater than 24) for i in possible_keys[0]: if i < 6 or i > 24: possible_keys[0] = list(filter((i).__ne__, possible_keys[0])) # print("Possible Key Length:", possible_keys) # print("Ciphertext Length Guess:", statistics.multimode(possible_keys[0])) # Get number of results from guess dupPossibleKeys = unique(possible_keys[0]) # print("De-duplicated Keys:", dupPossibleKeys) # print("Identified Key Lengths:", len(dupPossibleKeys)) return possible_keys
def _run_axis_tests(self, dtype): data = np.array([[0, 1, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0], [1, 0, 0, 0]]).astype(dtype) msg = 'Unique with 1d array and axis=0 failed' result = np.array([0, 1]) assert_array_equal(unique(data), result.astype(dtype), msg) msg = 'Unique with 2d array and axis=0 failed' result = np.array([[0, 1, 0, 0], [1, 0, 0, 0]]) assert_array_equal(unique(data, axis=0), result.astype(dtype), msg) msg = 'Unique with 2d array and axis=1 failed' result = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0]]) assert_array_equal(unique(data, axis=1), result.astype(dtype), msg) msg = 'Unique with 3d array and axis=2 failed' data3d = np.array([[[1, 1], [1, 0]], [[0, 1], [0, 0]]]).astype(dtype) result = np.take(data3d, [1, 0], axis=2) assert_array_equal(unique(data3d, axis=2), result, msg) uniq, idx, inv, cnt = unique(data, axis=0, return_index=True, return_inverse=True, return_counts=True) msg = "Unique's return_index=True failed with axis=0" assert_array_equal(data[idx], uniq, msg) msg = "Unique's return_inverse=True failed with axis=0" assert_array_equal(uniq[inv], data) msg = "Unique's return_counts=True failed with axis=0" assert_array_equal(cnt, np.array([2, 2]), msg) uniq, idx, inv, cnt = unique(data, axis=1, return_index=True, return_inverse=True, return_counts=True) msg = "Unique's return_index=True failed with axis=1" assert_array_equal(data[:, idx], uniq) msg = "Unique's return_inverse=True failed with axis=1" assert_array_equal(uniq[:, inv], data) msg = "Unique's return_counts=True failed with axis=1" assert_array_equal(cnt, np.array([2, 1, 1]), msg)
def _run_axis_tests(self, dtype): data = np.array([[0, 1, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0], [1, 0, 0, 0]]).astype(dtype) msg = 'Unique with 1d array and axis=0 failed' result = np.array([0, 1]) assert_array_equal(unique(data), result.astype(dtype), msg) msg = 'Unique with 2d array and axis=0 failed' result = np.array([[0, 1, 0, 0], [1, 0, 0, 0]]) assert_array_equal(unique(data, axis=0), result.astype(dtype), msg) msg = 'Unique with 2d array and axis=1 failed' result = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0]]) assert_array_equal(unique(data, axis=1), result.astype(dtype), msg) msg = 'Unique with 3d array and axis=2 failed' data3d = np.dstack([data] * 3) result = data3d[..., :1] assert_array_equal(unique(data3d, axis=2), result, msg) uniq, idx, inv, cnt = unique(data, axis=0, return_index=True, return_inverse=True, return_counts=True) msg = "Unique's return_index=True failed with axis=0" assert_array_equal(data[idx], uniq, msg) msg = "Unique's return_inverse=True failed with axis=0" assert_array_equal(uniq[inv], data) msg = "Unique's return_counts=True failed with axis=0" assert_array_equal(cnt, np.array([2, 2]), msg) uniq, idx, inv, cnt = unique(data, axis=1, return_index=True, return_inverse=True, return_counts=True) msg = "Unique's return_index=True failed with axis=1" assert_array_equal(data[:, idx], uniq) msg = "Unique's return_inverse=True failed with axis=1" assert_array_equal(uniq[:, inv], data) msg = "Unique's return_counts=True failed with axis=1" assert_array_equal(cnt, np.array([2, 1, 1]), msg)
def MI_RenyiCC_Multi(X, y=None, k=0, type='c', njobs=4): """ Mutual Information estimator based on the Renyi quadratic entropy and the Cauchy Schwartz divergence Parameters ---------- X = data of shape = [n_samples, n_features] type = type of the computation according to the variable types 'd' for discrete variables ,'c' for continuous variables (by default) and 'cd' for estimating MI of continuous variables with a discrete target y y = discrete target (for classification study), array of shape(n_samples) k = the number of neighbors to considered for the parzen window esimation (0 by default means that we considered all the samples) njobs = number of parallel job for computation (4 by default) Returns ------- MI_QRCS = Mutual Information score , i.e. equal to 0 if variables in X are independant """ N = X.shape[0] X, y = check_array_type(X, y, type) if type == 'd': u = np.array([at.unique(x, return_counts=True) for x in X.T]) freqs = DiscDensity(zip(*X.T), N) hr2c = Sum_Dot_Vect(u[:, 1], N) hr2 = Parallel(n_jobs=njobs, backend="threading")( delayed(Parallel_MI_RenyiCC_d_Multi)(i, freqs[i], u, N) for i in freqs) s = np.sum(np.array(hr2), 0) hr2a = s[0] hr2b = s[1] #print("hr2a:",hr2a,"-hr2b:",hr2b,"-hr2c:",hr2c) elif type == 'c': neigh = KNearestNeighbors(X, k) iqrx = [np.subtract(*np.percentile(x, [75, 25])) for x in X.T] varx = [np.var(x) for x in X.T] h = 0.85 * min(1 / np.sqrt(np.mean(varx)), np.mean(iqrx)) * N**(-1 / 6) #hr2 = Parallel(n_jobs=njobs, backend="threading")(delayed(Parallel_MI_RenyiCC_c_Multi)(i, X, h) for i in zip(*X.T)) hr2 = Parallel(n_jobs=njobs, backend="threading")( delayed(Parallel_MI_RenyiCC_c_Multi)(i, X[knn(i, neigh), :], h) for i in zip(*X.T)) s = np.sum(np.array(hr2), 0) hr2a = s[0] hr2b = s[1] pw = [s[i] for i in range(2, len(s))] hr2c = (1 / N**4) * reduce(mul, pw) hr2a = (1 / N**3) * hr2a hr2b = (1 / N**2) * hr2b #print("hr2a:",hr2a,"-hr2b:",hr2b,"-hr2c:",hr2c) elif type == "cd": yu, yc = at.unique(y, return_counts=True) #hr2y = -np.log(np.sum((yc/N)**2)) if X.shape[1] == 1: hx = 0.9 * min(np.std(X), np.subtract( *np.percentile(X, [75, 25]))) * N**(-1 / 5)**2 else: iqrx = [np.subtract(*np.percentile(x, [75, 25])) for x in X.T] varx = [np.var(x) for x in X.T] hx = 0.85 * min(1 / np.sqrt(np.mean(varx)), np.mean(iqrx)) * N**(-1 / 6) xyu = defaultdict(list) z = zip(*np.hstack((X, np.reshape(y, (N, 1)))).T) for i in z: xyu[int(i[-1:][0])].append(i[:-1]) neigh = KNearestNeighbors(X, k) hr2 = Parallel(n_jobs=njobs, backend="threading")(delayed( Parallel_MI_RenyiCC_cd_Multi)(np.array(xyu[yui]), X, hx, neigh, k) for yui in yu) s = np.sum(np.array(hr2), 0) hr2a = s[0] hr2b = s[1] nxyu = s[2] #Parallelize loop according to the biggest dimension between the number of samples and the number of features #Notes : by using the knn, the two parallelize estimations are different since the first compute the knn of the #ith sample through all the features dimension while the 2nd compute knn of the ith sample along each feature #dimension if X.shape[0] > X.shape[1]: #hr2cp = Parallel(n_jobs=njobs, backend="threading")(delayed(Parallel_MI_RenyiCC_cd_Multi_hr2c_dim0)(i, X, hx) for i in range(N)) hr2cp = Parallel(n_jobs=njobs, backend="threading")(delayed(Parallel_MI_RenyiCC_cd_Multi_hr2c_dim0)\ (i, X[knn(i, neigh),:], hx) for i in X) hr2cp = reduce(mul, np.sum(np.array(hr2cp), 0)) hr2c = (1 / N**4) * nxyu * hr2cp else: hr2cp = Parallel(n_jobs=njobs, backend="threading")( delayed(Parallel_MI_RenyiCC_cd_Multi_hr2c_dim1)( X[:, j], hx, neigh) for j in range(X.shape[1])) hr2cp = reduce(mul, hr2cp) hr2c = (1 / N**4) * nxyu * hr2cp #print("hr2a:",hr2a,"-hr2b:",hr2b,"-hr2c:",hr2c) hr2a = (1 / N**3) * hr2a hr2b = (1 / N**2) * hr2b #hr2x = -np.log((1/N**2)*hr2x) hr2a = max(10**(-100), hr2a) hr2b = max(10**(-100), hr2b) hr2c = max(10**(-100), hr2c) #print("hr2a:",hr2a,"-hr2b:",hr2b,"-hr2c:",hr2c) lhr2a = -np.log(hr2a) lhr2b = -np.log(hr2b) lhr2c = -np.log(hr2c) MI_QRCS = lhr2a - 0.5 * lhr2b - 0.5 * lhr2c return MI_QRCS
def test_unique_axis_list(self): msg = "Unique failed on list of lists" inp = [[0, 1, 0], [0, 1, 0]] inp_arr = np.asarray(inp) assert_array_equal(unique(inp, axis=0), unique(inp_arr, axis=0), msg) assert_array_equal(unique(inp, axis=1), unique(inp_arr, axis=1), msg)
def clustering_plot_and_metric(experiment_log: ExperimentLog, metric: str): assert metric in METRICS model = experiment_log.best_version.model # Get dm bs = 500 experiment_name = experiment_log.experiment_name misc = experiment_log.best_version.misc dm = get_dm(experiment_name, misc, bs) # Get latent encodings z, y = [], [] with torch.no_grad(): for idx, batch in enumerate(iter(dm.test_dataloader())): x, _y = batch if isinstance(model, VanillaVAE): _, _, _z, _, _ = model._run_step(x) elif isinstance(model, V3AE): _, _, _, _, _, _z, _, _ = model._run_step(x) _z = _z[0] z.append(_z) y.append(_y) # [len_test_dataset, latent_size] z, y = torch.cat(z, dim=0), torch.cat(y) # Plots colour_names = ["pink", "navyBlue", "yellow"] cmap_light = ListedColormap( [(*colours_rgb[c_n], 0.4) for c_n in colour_names][: len(misc["digits"])] ) cmap_dark = [colours_rgb[c_n] for c_n in colour_names][: len(misc["digits"])] x_mesh, y_mesh = ( torch.linspace(z[:, 0].min() - 1, z[:, 0].max() + 1, steps=100), torch.linspace(z[:, 1].min() - 1, z[:, 1].max() + 1, steps=100), ) x_mesh, y_mesh = torch.meshgrid(x_mesh, y_mesh) pos = torch.cat((x_mesh.reshape(-1, 1), y_mesh.reshape(-1, 1)), dim=1) # True with NN classifier clf = KNeighborsClassifier(n_neighbors=7) clf.fit(z, y) classes_mesh = clf.predict(pos).reshape(*x_mesh.shape) fig, ax = plt.subplots() ax.contourf(x_mesh, y_mesh, classes_mesh, cmap=cmap_light) ax = plot_points(ax, z, y, cmap_dark) # Kmeans predicted_classes, predicted_classes_mesh = np.zeros_like(y), np.zeros_like( classes_mesh ) n_clusters = len(unique(y)) if metric == EUCLIDEAN: kmeans = KMeans(n_clusters=n_clusters).fit(z) predicted_classes = kmeans.predict(z) predicted_classes_mesh = kmeans.predict(pos).reshape(*x_mesh.shape) print(f"[{metric}] F-Score: {f1_score(y, predicted_classes, average='micro')}") if metric == RIEMANNIAN: kmeans = RiemanninaKMeans(model, n_clusters=n_clusters).fit(z) fig, ax = plt.subplots() ax.contourf(x_mesh, y_mesh, predicted_classes_mesh, cmap=cmap_light) ax = plot_points(ax, z, predicted_classes, cmap_dark) plt.show()
def test_unique_1d(self): def check_all(a, b, i1, i2, c, dt): base_msg = 'check {0} failed for type {1}' msg = base_msg.format('values', dt) v = unique(a) assert_array_equal(v, b, msg) msg = base_msg.format('return_index', dt) v, j = unique(a, True, False, False) assert_array_equal(v, b, msg) assert_array_equal(j, i1, msg) msg = base_msg.format('return_inverse', dt) v, j = unique(a, False, True, False) assert_array_equal(v, b, msg) assert_array_equal(j, i2, msg) msg = base_msg.format('return_counts', dt) v, j = unique(a, False, False, True) assert_array_equal(v, b, msg) assert_array_equal(j, c, msg) msg = base_msg.format('return_index and return_inverse', dt) v, j1, j2 = unique(a, True, True, False) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, i2, msg) msg = base_msg.format('return_index and return_counts', dt) v, j1, j2 = unique(a, True, False, True) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, c, msg) msg = base_msg.format('return_inverse and return_counts', dt) v, j1, j2 = unique(a, False, True, True) assert_array_equal(v, b, msg) assert_array_equal(j1, i2, msg) assert_array_equal(j2, c, msg) msg = base_msg.format(('return_index, return_inverse ' 'and return_counts'), dt) v, j1, j2, j3 = unique(a, True, True, True) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, i2, msg) assert_array_equal(j3, c, msg) a = [5, 7, 1, 2, 1, 5, 7] * 10 b = [1, 2, 5, 7] i1 = [2, 3, 0, 1] i2 = [2, 3, 0, 1, 0, 2, 3] * 10 c = np.multiply([2, 1, 2, 2], 10) # test for numeric arrays types = [] types.extend(np.typecodes['AllInteger']) types.extend(np.typecodes['AllFloat']) types.append('datetime64[D]') types.append('timedelta64[D]') for dt in types: aa = np.array(a, dt) bb = np.array(b, dt) check_all(aa, bb, i1, i2, c, dt) # test for object arrays dt = 'O' aa = np.empty(len(a), dt) aa[:] = a bb = np.empty(len(b), dt) bb[:] = b check_all(aa, bb, i1, i2, c, dt) # test for structured arrays dt = [('', 'i'), ('', 'i')] aa = np.array(list(zip(a, a)), dt) bb = np.array(list(zip(b, b)), dt) check_all(aa, bb, i1, i2, c, dt) # test for ticket #2799 aa = [1. + 0.j, 1 - 1.j, 1] assert_array_equal(np.unique(aa), [1. - 1.j, 1. + 0.j]) # test for ticket #4785 a = [(1, 2), (1, 2), (2, 3)] unq = [1, 2, 3] inv = [0, 1, 0, 1, 1, 2] a1 = unique(a) assert_array_equal(a1, unq) a2, a2_inv = unique(a, return_inverse=True) assert_array_equal(a2, unq) assert_array_equal(a2_inv, inv) # test for chararrays with return_inverse (gh-5099) a = np.chararray(5) a[...] = '' a2, a2_inv = np.unique(a, return_inverse=True) assert_array_equal(a2_inv, np.zeros(5)) # test for ticket #9137 a = [] a1_idx = np.unique(a, return_index=True)[1] a2_inv = np.unique(a, return_inverse=True)[1] a3_idx, a3_inv = np.unique(a, return_index=True, return_inverse=True)[1:] assert_equal(a1_idx.dtype, np.intp) assert_equal(a2_inv.dtype, np.intp) assert_equal(a3_idx.dtype, np.intp) assert_equal(a3_inv.dtype, np.intp)
def fit(self, X, y, sample_mask=None, X_argsorted=None, check_input=True, sample_weight=None): # Deprecations if sample_mask is not None: warn( "The sample_mask parameter is deprecated as of version 0.14 " "and will be removed in 0.16.", DeprecationWarning) # Convert data random_state = check_random_state(self.random_state) if check_input: X = check_array(X, dtype=DTYPE, accept_sparse="csc") if issparse(X): X.sort_indices() if X.indices.dtype != np.intc or X.indptr.dtype != np.intc: raise ValueError("No support for np.int64 index based " "sparse matrices") # Determine output settings n_samples, self.n_features_ = X.shape is_classification = isinstance(self, ClassifierMixin) y = np.atleast_1d(y) if y.ndim == 1: # reshape is necessary to preserve the data contiguity against vs # [:, np.newaxis] that does not. y = np.reshape(y, (-1, 1)) self.n_outputs_ = y.shape[1] if is_classification: y = np.copy(y) self.classes_ = [] self.n_classes_ = [] for k in six.moves.range(self.n_outputs_): classes_k, y[:, k] = unique(y[:, k], return_inverse=True) self.classes_.append(classes_k) self.n_classes_.append(classes_k.shape[0]) else: self.classes_ = [None] * self.n_outputs_ self.n_classes_ = [1] * self.n_outputs_ self.n_classes_ = np.array(self.n_classes_, dtype=np.intp) max_depth = 1 max_features = 10 if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous: y = np.ascontiguousarray(y, dtype=DOUBLE) if len(y) != n_samples: raise ValueError("Number of labels=%d does not match " "number of samples=%d" % (len(y), n_samples)) if self.min_samples_split <= 0: raise ValueError("min_samples_split must be greater than zero.") if self.min_samples_leaf <= 0: raise ValueError("min_samples_leaf must be greater than zero.") if max_depth <= 0: raise ValueError("max_depth must be greater than zero. ") if sample_weight is not None: if (getattr(sample_weight, "dtype", None) != DOUBLE or not sample_weight.flags.contiguous): sample_weight = np.ascontiguousarray(sample_weight, dtype=DOUBLE) if len(sample_weight.shape) > 1: raise ValueError("Sample weights array has more " "than one dimension: %d" % len(sample_weight.shape)) if len(sample_weight) != n_samples: raise ValueError("Number of weights=%d does not match " "number of samples=%d" % (len(sample_weight), n_samples)) if self.method == 'bp': self.tree_ = _fit_binary_decision_stump_breakpoint( X, y, sample_weight, X_argsorted, self.calculate_probabilites) elif self.method == 'bp_threaded': self.tree_ = _fit_binary_decision_stump_breakpoint_threaded( X, y, sample_weight, X_argsorted, self.calculate_probabilites) else: self.tree_ = _fit_binary_decision_stump_breakpoint( X, y, sample_weight, X_argsorted, self.calculate_probabilites) if self.n_outputs_ == 1: self.n_classes_ = self.n_classes_[0] self.classes_ = self.classes_[0] return self
def freqFinder(fileName, freq, ciphertext, multiDimArray, testNum): # *************************************************************** # test 1 & 2 common processing logic # *************************************************************** # *************************************************************** # Guess encryption key length using maximum coincidences method # *************************************************************** start = time.time() possible_keys = [] possible_keys = find_key_length(freq, 1) print("Possible Key Length(s):", possible_keys) if len(possible_keys[0]) != 0: guessedKey = statistics.multimode(possible_keys[0]) else: possible_keys = find_key_length(freq, 2) if len(possible_keys[0]) != 0: guessedKey = statistics.multimode(possible_keys[0]) print("Guessed key length in attempt 2") else: print("ERROR: Could not guess key length - Exiting") exit(1) guessedKey = unique(possible_keys[0]) print("Guessed Key Length(s):", guessedKey) # Loop through all guessed key lengths # save the %accuracy and corresponding decrypted cipher text for each key length # select the decrypted string with the highest accuracy # run the best decrypted string through fuzzer # generate final fuzzed output # ************************************** TO IMPROVE DECRYPTION ACCURACY *************** # decr_pt_map = {} # for gk in guessedKey: # ### for each key select the other end for rand chars if accuracy < 50% & select the higher of the 2 # decr_pt_map = decrypt(gk) # best_accuracy = max(decr_pt_map keys) # # ### if best_accuracy is still bad run through all key lengths from 1 to 24 not in guessedKey list # if best_accuracy is < 10%: # for gk in range(1, 25): # if gk in guessedKey: # continue # decr_pt_map = decrypt(gk) # best_accuracy = max(decr_pt_map keys) # best_pt = decr_pt_map[best_accuracy] # # ### run best_pt through fuzzer to get final_pt # compute total runtime # log output # return run_time # ************************************** TO IMPROVE DECRYPTION ACCURACY *************** # define new func which takes a single keyLen as input & returns decr_pt_map # decr_pt_map uses %accuracy as key and decrypted string as value # def decrypt(keyLen) # Break out the cipherString into Key Length chunks for Index of Coincidence Calculations # sort the guessedKey array of possible lengths to use the smallest one, if multiple peaks found # guessedKey.sort() # tempGuessKey = keyLen best_tokenized_plaintext = "" max_accuracy = -1 for gk in range(0, len(guessedKey)): tempGuessKey = guessedKey[gk] cipherDict = [] for keyIndex in range(0, tempGuessKey): cipherStr = "" for y in range(keyIndex, len(ciphertext), tempGuessKey): cipherStr += ciphertext[y] cipherDict.append(cipherStr) distributionArray = [] for i in range(tempGuessKey): distributionArray.append(get_distribution(cipherDict[i])) # Get a sum of total ciphertext dictionary character values tempCharSum = [] for i in range(tempGuessKey): tempCharSum.append(sum(distributionArray[i])) # Cipher Text IOC cipherIndexOfCoincidence = [] # Diff Plaintext IOC Array deltaPlaintextIndexOfCoincidence = [] # initialize plaintext Dictionary plaintextDict = [] # CipherText IOC Generator for i in range(len(tempCharSum)): ioc = 0 for y in range(len(distributionArray[i])): ioc += (distributionArray[i][y] / tempCharSum[i])**2 cipherIndexOfCoincidence.append(ioc) # *************************************************************** # test 1 processing logic # *************************************************************** if testNum == 1: # Plaintext IOC Generator plaintextDictFile = fileName #plaintextDictFile = fileName f = open(plaintextDictFile) plainTextlines = f.readlines() # Plaintext Dictionary Populator for y in range(len(plainTextlines)): if y % 2: stripped = lambda s: "".join(i for i in s if (96 < ord(i) < 123) or ord(i) == 32) plainTextlines[y] = stripped(plainTextlines[y]) plaintextDict.append(plainTextlines[y]) # Declare temporary value and delta for difference in plaintext/ciphertext adjustedKeyLength=0 deltaMsgIocList=[] plaintxtMin=0 # Iterate through strings in plaintext dictionary and build plaintext IOC # 5 plaintext line input loop through them. dependency: ciphertext IOC. for i in range(len(plaintextDict)): adjustedKeyLength = guessedKey[gk] - round((len(ciphertext) - len(plaintextDict[i])) * guessedKey[gk] / len(ciphertext)) # Process IOC into Key Length chunks for Index of Coincidence Calculations plainIOCDict=[] tempPlainMsg = plaintextDict[i] # Breaking down into groups of characters - for keyIndex in range(0, adjustedKeyLength): plainIOCStr ='' for y in range(keyIndex, len(tempPlainMsg), adjustedKeyLength): plainIOCStr += tempPlainMsg[y] plainIOCDict.append(plainIOCStr) # take first group of characters and place into distributionArray distributionArray=[] for z in range(adjustedKeyLength): #distributionArray.append(get_distribution(plainIOCDict[z])) temp = get_distribution(plainIOCDict[z]) distributionArray.append(temp) # Get a sum of total plaintext dictionary character values in each segment/group of chars tempCharSum=[] for w in range(adjustedKeyLength): tempCharSum.append(sum(distributionArray[w])) # Plaintext IOC Array plaintextIndexOfCoincidence=[] # Plaintext/CipherText IOC Generator for p in range(len(tempCharSum)): ioc=0 for y in range(len(distributionArray[p])): # calculate IOC per char group ioc+=(distributionArray[p][y] / tempCharSum[p])**2 plaintextIndexOfCoincidence.append(ioc) # Have line and IOCs for one message deltaIOC=0 # Delta Calculation # of groups in plaintext index - for loop through 7 groups/bags. Compute delta for c in range(adjustedKeyLength): # plaintxt = 7 bags. deltaIOC += (cipherIndexOfCoincidence[c]-plaintextIndexOfCoincidence[c]) **2 deltaMsgIocList.append(deltaIOC) plaintxtMin = min(deltaMsgIocList) # do the decryption here res = [i for i, j in enumerate(deltaMsgIocList) if j == plaintxtMin] # output decrypted message print("Decrypted Plaintext for test-1 (deltaIoC technique): ", plaintextDict[res[0]]) # *************************************************************** # test 2 processing logic # *************************************************************** elif testNum == 2: # enhanced bad bucket logic March 02 2021 # 1. Calc IoC for the 400 word dict - expected to closely match any of the cipher buckets that are not random # 2. we already have IoC's for each of our cipher buckets (including rand buckets) # 3. we already know the number of rand chars per key length # 4. find the largest IoC differential between dictIoC and cipherIoC buckets # 5. mark the cipher buckets equaling the rand chars per key len that have the max IoC differential # find bad buckets - ciphertext chars that need to be dropped # find the number of random chars inserted per key length # if the first bucket is bad (low IoC) insert bucket numbers starting from 0 # if the last bucket is bad (low IoC) insert bucket numbers starting from t-1 badBucketlist = [] adjustedKeyLength = guessedKey[gk] - round((len(ciphertext) - 500) * guessedKey[gk] / len(ciphertext)) randchars = guessedKey[gk] - adjustedKeyLength # Uncomment next 6 lines for prev badBucketList strategy # if cipherIndexOfCoincidence[0] < cipherIndexOfCoincidence[guessedKey[gk] - 1]: # for i in range(0, randchars): # badBucketlist.insert(i, i) # else: # for i in range(0, randchars): # badBucketlist.insert(i, guessedKey[gk] - (1+i)) tmpCipherIOC = [] tmpCipherIOC = list(cipherIndexOfCoincidence) for r in range(0, randchars): min_idx = [i for i, j in enumerate(tmpCipherIOC) if j == min(tmpCipherIOC)] badBucketlist.insert(r, min_idx[0]) tmpCipherIOC.insert(min_idx[0], 999.0) # ================================================================================ # identifying bad buckets based on an absolute value of IoC - FAILURE RATE HIGH # for i in (0, 1, guessedKey[gk]-1, guessedKey[gk]-2): # if cipherIndexOfCoincidence[i] < 0.06399: # badBucketlist.append(i) # ================================================================================ # find the bad bucket based on min(IoC) and add to bad bucket list - NOT WORKABLE # if len(badBucketlist) == 0: # badBucketlist.append(cipherIndexOfCoincidence.index(min(cipherIndexOfCoincidence))) # ================================================================================ print(f'Bad Buckets ({randchars} rand char(s) per key): Random chars at index: {badBucketlist} Cipher IoC: {cipherIndexOfCoincidence}') cleanCipherBuckets = [] for i in range(0, guessedKey[gk]): if i not in badBucketlist: cleanCipherBuckets.append(cipherDict[i]) cleanCipherString = "" for j in range(0, len(cleanCipherBuckets[0])): for i in range(0, len(cleanCipherBuckets)): if j >= len(cleanCipherBuckets[i]): break cleanCipherString += cleanCipherBuckets[i][j] decrypt_key = [] curr_chi_squared = 0.0 plaintextBuckets = [] # chi-squared computation is performed for test 2 only - BEGIN # j loop is to iterate through each clean bucket # each clean bucket represents a string which is a mono-alphabetic shift # loop i, iterates through each char shift for each clean cipher bucket (string) # chi_squared is computed for each shifted string (total of 26 + original cipher str) # the min chi_squared across all shifts for a specific bucket is the most likely shift amount for j in range(0, len(cleanCipherBuckets)): # print("clean cipher bucket :[", j, "]: ", cleanCipherBuckets[j]) min_chi_squared = 9999999.0 for i in range(0, len(alphabet)): shifted_cipher_str = "" for c in cleanCipherBuckets[j]: shifted_c = (alphabet_map[c] + i) % len(alphabet) shifted_cipher_str += alphaDict[shifted_c] curr_chi_squared = round(chi_squared(fileName, shifted_cipher_str), 2) if curr_chi_squared < min_chi_squared: min_chi_squared = curr_chi_squared bucket_shift_key = i plaintext_bucket_str = shifted_cipher_str decrypt_key.insert(j, bucket_shift_key) # plaintextBuckets.insert(j, plaintext_bucket_str) plaintextBuckets.append(plaintext_bucket_str) # print("decrypted plaintext bucket :[", j, "]: right-shifted by [", bucket_shift_key, "]: ", plaintextBuckets[j]) print(f'Decryption Key = {decrypt_key}') # reconstitute plaintext buckets into a contiguous decrypted plaintext string decryptedPlaintext = "" for j in range(0, len(plaintextBuckets[0])): for i in range(0, len(plaintextBuckets)): if j >= len(plaintextBuckets[i]): break decryptedPlaintext += plaintextBuckets[i][j] # print chi-squared values # for i in range(0, len(cleanCipherBuckets)): # print("chi-squared for clean cipher bucket [", i, "]:", round(chi_squared(cleanCipherBuckets[i]),2)) # print("chi-squared for plain text: ", round(chi_squared(plaintextDict[res[0]]),2)) # print("chi-squared for cipher text: ", round(chi_squared(ciphertext),2)) # split the decrypted plaintext string on spaces # look-up each word using bestMatchfinder(source, fuzzyWord) # add searched word to the final decrypted string tokenized_plaintext = decryptedPlaintext.split() badWords = 0 for fuzzy in tokenized_plaintext: if fuzzy.rstrip() not in wordDict: badWords += 1 accuracy = ((len(tokenized_plaintext) - badWords) / len(tokenized_plaintext)) * 100 accuracy = round(accuracy, 2) print(f'Decryption accuracy {accuracy}% found for guessedKey {guessedKey[gk]}') if accuracy > max_accuracy: best_gk = guessedKey[gk] max_accuracy = accuracy best_decrypted_plaintext = decryptedPlaintext best_tokenized_plaintext = tokenized_plaintext if accuracy > 99.9: break print(f'Best decryption accuracy {max_accuracy}% found for guessedKey {best_gk}') decryptedPlaintext = best_decrypted_plaintext tokenized_plaintext = best_tokenized_plaintext accuracy = max_accuracy badWords = 0 badWordList = [] finalPlaintext = "" for fuzzy in tokenized_plaintext: if fuzzy.rstrip() not in wordDict: badWordList.append(fuzzy) badWords += 1 lookup = bestMatchFinder(fileName, wordDict, fuzzy) # print(f'fuzzy word: {fuzzy} --> match in dict2 {lookup}') finalPlaintext += lookup + ' ' else: finalPlaintext += fuzzy + ' ' print(f'Intermediate fuzzed plaintext: {finalPlaintext}') # lookup dict file 1 for decrypted words # if found, final string is detected, exit ptFound = False ptMatches = 0 tokenized_finalPlaintext = finalPlaintext.split() for ptStr in plaintextStrDict: ptMatches = 0 for ptWord in ptStr.split(): for fuzzy in tokenized_finalPlaintext: if fuzzy.rstrip() == ptWord: ptMatches += 1 print(f'***>>>>> ({ptMatches}) plaintext token[{fuzzy.rstrip()}] matched plaintext_dictionary_test1 word [{ptWord}]') # ok - I'm convinced now that the fuzzed str is indeed in dict file 1 if ptMatches > 10: ptFound = True finalPlaintext = ptStr accuracy = 100.0 badWords = 0 badWordList = [] print(f'***>>>>> final plain text found in plaintext_dictionary_test1 = {finalPlaintext}') break if ptFound: break if ptFound: break # print(f'{tokenized_plaintext}') print(f'Input Ciphertext with random chars (len = {len(ciphertext)}):{ciphertext}') print(f'Clean Ciphertext (len = {len(cleanCipherString)}):{cleanCipherString}') print(f'Decrypted Plaintext - chi-squared analysis (len = {len(decryptedPlaintext)}):{decryptedPlaintext}') print(f'Accuracy of decryption = {accuracy}% {len(tokenized_plaintext) - len(badWordList)} out of {len(tokenized_plaintext)} decrypted accurately') print(f'Decrypted words not in Dict: {badWordList}') print(f'Final fuzzed Plaintext: {finalPlaintext}') if os.path.exists(selectedPlainTextFile): ptStr = open(selectedPlainTextFile,'r').read() tok_ptStr = ptStr.split() found = 0 tok_finStr = finalPlaintext.split() for finWord in tok_finStr: if finWord.rstrip() in tok_ptStr: found += 1 fuzz_accuracy = round((found/len(tok_ptStr)) * 100, 2) print(f'Accuracy of fuzzer = {fuzz_accuracy}% {found} out of {len(tok_ptStr)} decrypted words fuzzed accurately') end=time.time() decr_runtime_str = str(round((end - start)*1000, 2)) + " ms" now = datetime.datetime.now().strftime("%m-%d-%Y %H:%M:%S") print(f'**********************************************************') print(f'*** Runtime of the TBZ chi-squared Decryptor is {decr_runtime_str}') print(f'*** Run completed at: {now}') print(f'**********************************************************') mode = 'a+' if os.path.exists(fileToWriteTo) else 'w+' with open(fileToWriteTo,mode) as f: f.write('\n') f.write('Decryptor :: Decrypted Plaintext - chi-squared analysis\n') f.write(decryptedPlaintext) f.write('\n') f.write('Decryptor :: Final Plaintext from fuzzer\n') f.write(finalPlaintext) f.write('\n') f.write('Decryptor :: Accuracy : ') f.write(str(accuracy)) f.write(' %\n') f.write('Decryptor :: Decryption Runtime : ') f.write(decr_runtime_str) f.write('\nDecryptor :: Run Completed at : ') f.write(now) f.write("\n\n======================================================================\n\n") f.close() return end - start
def MI_RenyiCC(x, y, type, njobs=4): """ Mutual Information estimator based on the Renyi quadratic entropy and the Cauchy Schwartz divergence Compute Renyi Quadratic Entropies hr2(p(x,y)*p(x)*p(y)), hr2 p(x,y) and hr2 p(x)p(y) for all types of variables couple Parameters ---------- x, y = two variables type = type of the computation according to the variable types 'dd' for 2 discret variables ,'cc' for 2 continue variables or 'cd' for 2 mixed variables njobs = number of parallel job for computation (4 by default) Returns : MI_QRCS = hr2(p(x,y)*p(x)*p(y))-1/2hr2 p(x,y) - 1/2hr2 p(x)p(y) , i.e. equal to 0 if x and y are independant Notes ----- MI_RenyiCC_Multi may be used for bivariate variable => could be removed """ N = len(x) if type == 'dd': xu, xc = at.unique(x, return_counts=True) yu, yc = at.unique(y, return_counts=True) hr2x = -np.log(np.sum((xc / N)**2)) hr2y = -np.log(np.sum((yc / N)**2)) freqs = DiscDensity(zip(x, y), N) hr2c = np.sum( np.dot(np.reshape((yc / N)**2, (len(yc), 1)), np.reshape((xc / N)**2, (1, len(xc))))) hr2 = Parallel(n_jobs=njobs, backend="threading")( delayed(Parallel_MI_RenyiCC_d)(i, freqs[i], xu, yu, xc, yc, N) for i in freqs) s = np.sum(np.array(hr2), 0) hr2a = s[0] hr2b = s[1] #print("hr2a:",hr2a,"-hr2b:",hr2b,"-hr2c:",hr2c) elif type == 'cc': hr2a = 0 hr2b = 0 hr2c = 0 iqrx = np.subtract(*np.percentile(x, [75, 25])) iqry = np.subtract(*np.percentile(y, [75, 25])) h = 0.85 * min(1 / np.sqrt((np.var(x) + np.var(y)) / 2), (iqrx + iqry) / 2) * N**(-1 / 6) hr2x = 0 hr2y = 0 pwX = 0 pwY = 0 for i in zip(x, y): hr2x += ParzenWindow(i[0] - x, 0.9 * min(np.std(x), iqrx) * N**(-1 / 5)**2) hr2y += ParzenWindow(i[1] - y, 0.9 * min(np.std(x), iqrx) * N**(-1 / 5)**2) pwx = ParzenWindow(i[0] - x, h) pwy = ParzenWindow(i[1] - y, h) hr2a += pwx * pwy w = zip(i[0] - x, i[1] - y) hr2b += ParzenWindow(w, h, 2) pwX += pwx pwY += pwy hr2c += (1 / N**4) * (pwX * pwY) hr2a = (1 / N**3) * hr2a hr2b = (1 / N**2) * hr2b #print("-hr2a:",hr2a,"-hr2b:",hr2b,"-hr2c:",hr2c,"-pw:",[pwX,pwY]) hr2x = -np.log((1 / N**2) * hr2x) hr2y = -np.log((1 / N**2) * hr2y) elif type == "cd": yu, yc = at.unique(y, return_counts=True) hr2y = -np.log(np.sum((yc / N)**2)) xyu = defaultdict(list) iqrx = np.subtract(*np.percentile(x, [75, 25])) hx = 0.9 * min(np.std(x), iqrx) * N**(-1 / 5)**2 hr2x = 0 hr2a = 0 hr2b = 0 hr2c = 0 nxyu = 0 for i in zip(x, y): xyu[i[1]].append(i[0]) hr2x += ParzenWindow(i[0] - x, hx) for yui in yu: nxyui = len(xyu[yui]) varxyui = np.var(xyu[yui]) iqrxyui = np.subtract(*np.percentile(xyu[yui], [75, 25])) h = 0.85 * min(1 / np.sqrt((np.var(x) + varxyui) / 2), (iqrx + iqrxyui) / 2) * N**(-1 / 6) hr2a += nxyui * np.sum(ParzenWindow(j - x, hx) for j in xyu[yui]) hr2b += np.sum(ParzenWindow(j - xyu[yui], hx) for j in xyu[yui]) nxyu += nxyui**2 hr2c = (1 / N**4) * nxyu * np.sum(ParzenWindow(xi - x, hx) for xi in x) #print("hr2a:",hr2a,"-hr2b:",hr2b,"-hr2c:",hr2c) hr2a = (1 / N**3) * hr2a hr2b = (1 / N**2) * hr2b hr2x = -np.log((1 / N**2) * hr2x) lhr2a = -np.log(hr2a) lhr2b = -np.log(hr2b) lhr2c = -np.log(hr2c) MI_QRCS = lhr2a - 0.5 * lhr2b - 0.5 * lhr2c return MI_QRCS
def test_unique(self): def check_all(a, b, i1, i2, c, dt): base_msg = 'check {0} failed for type {1}' msg = base_msg.format('values', dt) v = unique(a) assert_array_equal(v, b, msg) msg = base_msg.format('return_index', dt) v, j = unique(a, 1, 0, 0) assert_array_equal(v, b, msg) assert_array_equal(j, i1, msg) msg = base_msg.format('return_inverse', dt) v, j = unique(a, 0, 1, 0) assert_array_equal(v, b, msg) assert_array_equal(j, i2, msg) msg = base_msg.format('return_counts', dt) v, j = unique(a, 0, 0, 1) assert_array_equal(v, b, msg) assert_array_equal(j, c, msg) msg = base_msg.format('return_index and return_inverse', dt) v, j1, j2 = unique(a, 1, 1, 0) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, i2, msg) msg = base_msg.format('return_index and return_counts', dt) v, j1, j2 = unique(a, 1, 0, 1) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, c, msg) msg = base_msg.format('return_inverse and return_counts', dt) v, j1, j2 = unique(a, 0, 1, 1) assert_array_equal(v, b, msg) assert_array_equal(j1, i2, msg) assert_array_equal(j2, c, msg) msg = base_msg.format(('return_index, return_inverse ' 'and return_counts'), dt) v, j1, j2, j3 = unique(a, 1, 1, 1) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, i2, msg) assert_array_equal(j3, c, msg) a = [5, 7, 1, 2, 1, 5, 7]*10 b = [1, 2, 5, 7] i1 = [2, 3, 0, 1] i2 = [2, 3, 0, 1, 0, 2, 3]*10 c = np.multiply([2, 1, 2, 2], 10) # test for numeric arrays types = [] types.extend(np.typecodes['AllInteger']) types.extend(np.typecodes['AllFloat']) types.append('datetime64[D]') types.append('timedelta64[D]') for dt in types: aa = np.array(a, dt) bb = np.array(b, dt) check_all(aa, bb, i1, i2, c, dt) # test for object arrays dt = 'O' aa = np.empty(len(a), dt) aa[:] = a bb = np.empty(len(b), dt) bb[:] = b check_all(aa, bb, i1, i2, c, dt) # test for structured arrays dt = [('', 'i'), ('', 'i')] aa = np.array(list(zip(a, a)), dt) bb = np.array(list(zip(b, b)), dt) check_all(aa, bb, i1, i2, c, dt) # test for ticket #2799 aa = [1. + 0.j, 1 - 1.j, 1] assert_array_equal(np.unique(aa), [1. - 1.j, 1. + 0.j]) # test for ticket #4785 a = [(1, 2), (1, 2), (2, 3)] unq = [1, 2, 3] inv = [0, 1, 0, 1, 1, 2] a1 = unique(a) assert_array_equal(a1, unq) a2, a2_inv = unique(a, return_inverse=True) assert_array_equal(a2, unq) assert_array_equal(a2_inv, inv)
"\n \n----------> ...in practice: boundary nodes (from boundary elements) \n" ) print("\n line elements with Dirichlet tag\n", mesh.cell_sets_dict["Dirichlet"]["line"]) name = "Dirichlet" tag = mesh.field_data[name][0] dim = mesh.field_data[name][1] if dim == 0: # array containing indices of elements in the boundary on_boundary = np.nonzero( mesh.cell_data_dict["gmsh:physical"]["vertex"] == tag)[0] # array containing indices of nodes in the boundary nodes = unique(mesh.cells_dict["vertex"][on_boundary]) elif dim == 1: on_boundary = np.nonzero( mesh.cell_data_dict["gmsh:physical"]["line"] == tag)[0] nodes = unique(mesh.cells_dict["line"][on_boundary]) print("\n nodes related to tag Dirichlet\n", nodes) for n in nodes: print("\nnode #", n, "@", points[n]) print("\n\n") print("\n node entries in dictionary with tag \"Points\"", mesh.cell_sets_dict["Points"]["vertex"]) name = "Points" tag = mesh.field_data[name][0]
def fit(self, X, y, sample_mask=None, X_argsorted=None, check_input=True, sample_weight=None): random_state = check_random_state(self.random_state) # Deprecations if sample_mask is not None: warn("The sample_mask parameter is deprecated as of version 0.14 " "and will be removed in 0.16.", DeprecationWarning) if X_argsorted is not None: warn("The X_argsorted parameter is deprecated as of version 0.14 " "and will be removed in 0.16.", DeprecationWarning) # Convert data if check_input: X = check_array(X, dtype=DTYPE, accept_sparse="csc") if issparse(X): X.sort_indices() if X.indices.dtype != np.intc or X.indptr.dtype != np.intc: raise ValueError("No support for np.int64 index based " "sparse matrices") # Determine output settings n_samples, self.n_features_ = X.shape is_classification = isinstance(self, ClassifierMixin) y = np.atleast_1d(y) if y.ndim == 1: # reshape is necessary to preserve the data contiguity against vs # [:, np.newaxis] that does not. y = np.reshape(y, (-1, 1)) self.n_outputs_ = y.shape[1] if is_classification: y = np.copy(y) self.classes_ = [] self.n_classes_ = [] for k in six.moves.range(self.n_outputs_): classes_k, y[:, k] = unique(y[:, k], return_inverse=True) self.classes_.append(classes_k) self.n_classes_.append(classes_k.shape[0]) else: self.classes_ = [None] * self.n_outputs_ self.n_classes_ = [1] * self.n_outputs_ self.n_classes_ = np.array(self.n_classes_, dtype=np.intp) max_depth = 1 max_features = 10 if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous: y = np.ascontiguousarray(y, dtype=DOUBLE) if len(y) != n_samples: raise ValueError("Number of labels=%d does not match " "number of samples=%d" % (len(y), n_samples)) if self.min_samples_split <= 0: raise ValueError("min_samples_split must be greater than zero.") if self.min_samples_leaf <= 0: raise ValueError("min_samples_leaf must be greater than zero.") if max_depth <= 0: raise ValueError("max_depth must be greater than zero. ") if not (0 < max_features <= self.n_features_): raise ValueError("max_features must be in (0, n_features]") if sample_weight is not None: if (getattr(sample_weight, "dtype", None) != DOUBLE or not sample_weight.flags.contiguous): sample_weight = np.ascontiguousarray( sample_weight, dtype=DOUBLE) if len(sample_weight.shape) > 1: raise ValueError("Sample weights array has more " "than one dimension: %d" % len(sample_weight.shape)) if len(sample_weight) != n_samples: raise ValueError("Number of weights=%d does not match " "number of samples=%d" % (len(sample_weight), n_samples)) if self.method == 'default': self.tree_ = _fit_regressor_stump(X, y, sample_weight, X_argsorted) elif self.method == 'threaded': self.tree_ = _fit_regressor_stump_threaded(X, y, sample_weight, X_argsorted) elif self.method == 'c': self.tree_ = _fit_regressor_stump_c_ext(X, y, sample_weight, X_argsorted) elif self.method == 'c_threaded': self.tree_ = _fit_regressor_stump_c_ext_threaded(X, y, sample_weight, X_argsorted) else: self.tree_ = _fit_regressor_stump(X, y, sample_weight, X_argsorted) if self.n_outputs_ == 1: self.n_classes_ = self.n_classes_[0] self.classes_ = self.classes_[0] return self
def test_unique_1d_with_axis(self, axis): x = np.array([4, 3, 2, 3, 2, 1, 2, 2]) uniq = unique(x, axis=axis) assert_array_equal(uniq, [1, 2, 3, 4])
def print_unique_counts(d): column_list = d.columns.tolist() print "number of rows: {}".format(len(d[column_list[0]])) print "" for c in column_list: print "number of unique {}: {}".format(c,len(arraysetops.unique(d[c])))