def bfs(src,dst,seeds,printLayers=False): # holds vertices in the current layer of the bfs Z = ak.unique(seeds) # holds the visited vertices V = ak.unique(Z) # holds vertices in Z to start with # frontiers F = [Z] while Z.size != 0: if printLayers: print("Z.size = ",Z.size," Z = ",Z) fZv = ak.in1d(src,Z) # find src vertex edges W = ak.unique(dst[fZv]) # compress out dst vertices to match and make them unique Z = ak.setdiff1d(W,V) # subtract out vertices already visited V = ak.union1d(V,Z) # union current frontier into vertices already visited F.append(Z) return (F,V)
def apply_permutation(self, perm): """ Apply a permutation to an entire DataFrame. This may be useful if you want to unsort an DataFrame, or even to apply an arbitrary permutation such as the inverse of a sorting permutation. Parameters ---------- perm : ak.pdarray A permutation array. Should be the same size as the data arrays, and should consist of the integers [0,size-1] in some order. Very minimal testing is done to ensure this is a permutation. See Also -------- sort """ if (perm.min() != 0) or (perm.max() != perm.size - 1): raise ValueError("The indicated permutation is invalid.") if ak.unique(perm).size != perm.size: raise ValueError("The indicated permutation is invalid.") for key, val in self.data.items(): self[key] = self[key][perm]
def _merge_all(self, array): idx = self.index callback = aku.get_callback(idx) for other in array: self._check_types(other) idx = aku.concatenate([idx, other.index], ordered=False) return Index(callback(ak.unique(idx)))
def conn_comp(src, dst, printCComp=False, printLayers=False): unvisited = ak.unique(src) if printCComp: print("unvisited size = ", unvisited.size, unvisited) components = [] while unvisited.size > 0: # use lowest numbered vertex as representative vertex rep_vertex = unvisited[0] # bfs from rep_vertex layers,visited = bfs(src,dst,ak.array([rep_vertex]),printLayers) # add verticies in component to list of components components.append(visited) # subtract out visited from unvisited vertices unvisited = ak.setdiff1d(unvisited,visited) if printCComp: print(" visited size = ", visited.size, visited) if printCComp: print("unvisited size = ", unvisited.size, unvisited) return components
def invert_permutation(perm): """ Find the inverse of a permutation array. Parameters ---------- perm : ak.pdarray The permutation array. Returns ------- ak.array The inverse of the permutation array. """ # I think this suffers from overflow errors on large arrays. #if perm.sum() != (perm.size * (perm.size -1)) / 2: # raise ValueError("The indicated permutation is invalid.") if ak.unique(perm).size != perm.size: raise ValueError("The array is not a permutation.") return ak.coargsort([perm, ak.arange(0, perm.size)])
def invert_permutation(perm): """ Find the inverse of a permutation array. Parameters ---------- perm : ak.pdarray The permutation array. Returns ------- ak.pdarray The inverse of the permutation array. """ # Test if the array is actually a permutation rng = perm.max() - perm.min() if (ak.unique(perm).size != perm.size) and (perm.size != rng + 1): raise ValueError("The array is not a permutation.") return ak.coargsort([perm, ak.arange(0, perm.size)])
def right_align(left, right): """ Map two arrays of sparse values to the 0-up index set implied by the right array, discarding values from left that do not appear in right. Parameters ---------- left : pdarray Left-hand identifiers right : pdarray Right-hand identifiers that define the index Returns ------- keep : pdarray, bool Logical index of left-hand values that survived aligned : (pdarray, pdarray) Left and right arrays with values replaced by 0-up indices """ uright = ak.unique(right) keep = ak.in1d(left, uright) fleft = left[keep] return keep, align(fleft, right)
def __init__(self, segments, values, copy=False, lengths=None, grouping=None): """ An array of variable-length arrays, also called a skyline array or ragged array. Parameters ---------- segments : pdarray, int64 Start index of each sub-array in the flattened values array values : pdarray The flattened values of all sub-arrays copy : bool If True, make a copy of the input arrays; otherwise, just store a reference. Returns ------- SegArray Data structure representing an array whose elements are variable-length arrays. Notes ----- Keyword args 'lengths' and 'grouping' are not user-facing. They are used by the attach method. """ if not isinstance(segments, ak.pdarray) or segments.dtype != ak.int64: raise TypeError("Segments must be int64 pdarray") if not ak.is_sorted(segments) or (ak.unique(segments).size != segments.size): raise ValueError("Segments must be unique and in sorted order") if segments.size > 0: if segments.min() != 0 or segments.max() >= values.size: raise ValueError( "Segments must start at zero and be less than values.size") elif values.size > 0: raise ValueError( "Cannot have non-empty values with empty segments") if copy: self.segments = segments[:] self.values = values[:] else: self.segments = segments self.values = values self.size = segments.size self.valsize = values.size if lengths is None: self.lengths = self._get_lengths() else: self.lengths = lengths self.dtype = values.dtype if grouping is None: if self.size == 0: self.grouping = ak.GroupBy(ak.zeros(0, dtype=ak.int64)) else: # Treat each sub-array as a group, for grouped aggregations self.grouping = ak.GroupBy( ak.broadcast(self.segments, ak.arange(self.size), self.valsize)) else: self.grouping = grouping
# print out the pdarrays in the dict and their types print(nfDF['start'],nfDF['start'].dtype) print(nfDF['srcIP'],type(nfDF['srcIP'])) # Strings dosen't have a dtype?!? print(nfDF['dstIP'],type(nfDF['dstIP'])) # Strings dosen't have a dtype?!? print(nfDF['srcPort'],nfDF['srcPort'].dtype) print(nfDF['dstPort'],nfDF['dstPort'].dtype) print(nfDF) # print oput the symbols the server knows about print(ak.info(ak.AllSymbols)) # print out how much memory is being used by the server print("mem used: ", ak.get_mem_used()) # get the unique srcIP and the counts for each unique srcIP u,c = ak.unique(nfDF['srcIP'],return_counts=True) print("unique values = ", u.size,u) print("value counts = ", c.size,c) # get the unique dstIP and the counts for each unique dstIP u,c = ak.unique(nfDF['dstIP'],return_counts=True) print("unique values = ", u.size,u) print("value counts = ", c.size,c) # get the unique srcPort and the counts for each unique srcPort u,c = ak.unique(nfDF['srcPort'],return_counts=True) print("unique values = ", u.size,u) print("value counts = ", c.size,c) # get the unique dstPort and the counts for each unique dstPort u,c = ak.unique(nfDF['dstPort'],return_counts=True)
inds |= (strings == word) assert ((inds == matches).all()) print("in1d and iter passed") # argsort akperm = ak.argsort(strings) aksorted = strings[akperm].to_ndarray() npsorted = np.sort(test_strings) assert ((aksorted == npsorted).all()) catperm = ak.argsort(cat) catsorted = cat[catperm].to_ndarray() assert ((catsorted == npsorted).all()) print("argsort passed") # unique akuniq = ak.unique(strings) catuniq = ak.unique(cat) akset = set(akuniq.to_ndarray()) catset = set(catuniq.to_ndarray()) assert (akset == catset) # There should be no duplicates assert (akuniq.size == len(akset)) npset = set(np.unique(test_strings)) # When converted to a set, should agree with numpy assert (akset == npset) print("unique passed") # groupby g = ak.GroupBy(strings) gc = ak.GroupBy(cat) # Unique keys should be same result as ak.unique
ak.v = False a = ak.arange(0,10,1) b = a[a<5] a = ak.linspace(0,9,10) b = a[a<5] print(b) ak.v = True ak.pdarrayIterThresh = 1000 a = ak.arange(0,10,1) print(list(a)) ak.v = False a = ak.randint(10,30,40) u = ak.unique(a) h = ak.histogram(a,bins=20) print(a) print(h.size,h) print(u.size,u) ak.v = False a = ak.randint(10,30,50) h = ak.histogram(a,bins=20) print(a) print(h) ak.v = False a = ak.randint(0,2,50,dtype=ak.bool) print(a) print(a.sum())
def _merge(self, other): self._check_types(other) callback = aku.get_callback(self.index) idx = aku.concatenate([self.index, other.index], ordered=False) return Index(callback(ak.unique(idx)))