def score_anomalies(self, prior_idx): """ Calculate anomaly scores based on max distance from epsilon for each anomalous sequence. Args: prior_idx (int): starting index of window within full set of test values for channel """ groups = [list(group) for group in mit.consecutive_groups(self.i_anom)] for e_seq in groups: score_dict = { "start_idx": e_seq[0] + prior_idx, "end_idx": e_seq[-1] + prior_idx, "score": 0 } score = max([ abs(self.e_s[i] - self.epsilon) / (self.mean_e_s + self.sd_e_s) for i in range(e_seq[0], e_seq[-1] + 1) ]) inv_score = max([ abs(self.e_s_inv[i] - self.epsilon_inv) / (self.mean_e_s + self.sd_e_s) for i in range(e_seq[0], e_seq[-1] + 1) ]) # the max score indicates whether anomaly was from regular # or inverted errors score_dict['score'] = max([score, inv_score]) self.anom_scores.append(score_dict)
def seizure_event(label_test, label): test_index = [] for index, i in enumerate(label_test): if i == 1: test_index.append(index) seizure_segment = [list(group) for group in mit.consecutive_groups(test_index)] # seizure found ss_length = len(seizure_segment) ## calculate if the seizure segment has successfully being predicted. ## During the seizure segment, if one of the seizure is predicted, we concluded that we detected the seizure segment seizure_count = 0 # seizure predicted seconds_list = [] for y in seizure_segment: events = [label[i] for i in y] if 1 in events: seizure_count += 1 # calculate how many seconds passed when they successfully detected the first seizure label res = next(x for x, value in enumerate(events) if value == 1) seconds_list.append(res + 1) # get the average predicted seconds average_seconds = sum(seconds_list) / len(seconds_list) return ss_length, seizure_count, seizure_count / ss_length, average_seconds
def compare_to_epsilon(self, errors_all, inverse=False): """ Compare smoothed error values to epsilon (error threshold) and group consecutive errors together into sequences. Args: errors_all (obj): Errors class object containing list of all previously identified anomalies in test set """ e_s = self.e_s if not inverse else self.e_s_inv epsilon = self.epsilon if not inverse else self.epsilon_inv # Check: scale of errors compared to values too small? if not (self.sd_e_s > (.05 * self.sd_values) or max(self.e_s) > (.05 * self.inter_range)) or not max(self.e_s) > 0.05: return i_anom = np.argwhere((e_s >= epsilon) & (e_s > 0.05 * self.inter_range)).reshape(-1, ) if len(i_anom) == 0: return buffer = np.arange(1, self._error_buffer + 1) i_anom = np.sort( np.concatenate( (i_anom, np.array([i + buffer for i in i_anom]).flatten(), np.array([i - buffer for i in i_anom]).flatten()))) i_anom = i_anom[(i_anom < len(e_s)) & (i_anom >= 0)] # if it is first window, ignore initial errors (need some history) if self.window_num == 0: i_anom = i_anom[i_anom >= self.num_to_ignore] else: i_anom = i_anom[i_anom >= len(e_s) - self._batch_size] i_anom = np.sort(np.unique(i_anom)) # capture max of non-anomalous values below the threshold # (used in filtering process) batch_position = self.window_num * self._batch_size window_indices = np.arange(0, len(e_s)) + batch_position adj_i_anom = i_anom + batch_position window_indices = np.setdiff1d(window_indices, np.append(errors_all.i_anom, adj_i_anom)) candidate_indices = np.unique(window_indices - batch_position) non_anom_max = np.max(np.take(e_s, candidate_indices)) # group anomalous indices into continuous sequences groups = [list(group) for group in mit.consecutive_groups(i_anom)] E_seq = [(g[0], g[-1]) for g in groups if not g[0] == g[-1]] if inverse: self.i_anom_inv = i_anom self.E_seq_inv = E_seq self.non_anom_max_inv = non_anom_max else: self.i_anom = i_anom self.E_seq = E_seq self.non_anom_max = non_anom_max
def label_sequential_regions(inlist): """Input a list of labeled tuples and return a dictionary of sequentially labeled regions. Args: inlist (list): A list of tuples with the first number representing the index and the second the index label. Returns: dict: Dictionary of labeled regions. Examples: >>> label_sequential_regions([(1, 'O'), (2, 'O'), (3, 'O'), (4, 'M'), (5, 'M'), (6, 'I'), (7, 'M'), (8, 'O'), (9, 'O')]) {'O1': [1, 2, 3], 'M1': [4, 5], 'I1': [6], 'M2': [7], 'O2': [8, 9]} """ import more_itertools as mit df = pd.DataFrame(inlist).set_index(0) labeled = {} for label in df[1].unique(): iterable = df[df[1] == label].index.tolist() labeled.update({ '{}{}'.format(label, i + 1): items for i, items in enumerate( [list(group) for group in mit.consecutive_groups(iterable)]) }) return labeled
def find_objects(array): array = np.unique(array, axis=0) array = list(combinations(list(array), 2)) angle_lst, array = calc_angle(array) pixels = list(range(1, 201)) for line in array: left_boundary = list(range(line[0][0] - 20, line[0][0])) pixels = [item for item in pixels if item not in left_boundary] right_boundary = list(range(line[0][0], line[0][0] + 20)) pixels = [item for item in pixels if item not in right_boundary] continuous = [list(group) for group in mit.consecutive_groups(pixels)] new_continuous = [] for sublist in continuous: for item in sublist: new_continuous.append(item) continuous = new_continuous # print(continuous) ranges = [] # print(list(continuous)) for k, g in groupby(enumerate(continuous), lambda x: x[0] - x[1]): group = (map(itemgetter(1), g)) group = list(map(int, group)) ranges.append((group[0], group[-1])) # print(ranges) # return angle_lst, array, ranges biggest = 0 direction = 100 # print(ranges) for rangeset in ranges: if (rangeset[1] - rangeset[0]) > biggest: direction = rangeset[0] + ((rangeset[1] - rangeset[0]) / 2) return angle_lst, array, direction
def process_batches(self, channel): """ Top-level function for the Error class that loops through batches of values for a channel. Args: channel (obj): Channel class object containing train/test data for X,y for a single channel """ self.adjust_window_size(channel) for i in range(0, self.n_windows + 1): prior_idx = i * self.config.batch_size idx = (self.config.window_size * self.config.batch_size) \ + (i * self.config.batch_size) if i == self.n_windows: idx = channel.y_test.shape[0] window = ErrorWindow(channel, self.config, prior_idx, idx, self, i) window.find_epsilon() window.find_epsilon(inverse=True) window.compare_to_epsilon(self) window.compare_to_epsilon(self, inverse=True) if len(window.i_anom) == 0 and len(window.i_anom_inv) == 0: continue window.prune_anoms() window.prune_anoms(inverse=True) if len(window.i_anom) == 0 and len(window.i_anom_inv) == 0: continue window.i_anom = np.sort( np.unique(np.append(window.i_anom, window.i_anom_inv))).astype('int') window.score_anomalies(prior_idx) # update indices to reflect true indices in full set of values self.i_anom = np.append(self.i_anom, window.i_anom + prior_idx) self.anom_scores = self.anom_scores + window.anom_scores if len(self.i_anom) > 0: # group anomalous indices into continuous sequences groups = [ list(group) for group in mit.consecutive_groups(self.i_anom) ] self.E_seq = [(int(g[0]), int(g[-1])) for g in groups if not g[0] == g[-1]] # additional shift is applied to indices so that they represent the # position in the original data array, obtained from the .npy files, # and not the position on y_test (See PR #27). self.E_seq = [(e_seq[0] + self.config.l_s, e_seq[1] + self.config.l_s) for e_seq in self.E_seq] self.merge_scores()
def update_utxos(self, utxos_to_add: List[UtxoType], utxos_to_delete: List[Tuple[int, int]]): if utxos_to_delete: row_indexes_to_remove = [] for utxo_id in utxos_to_delete: utxo = self.utxo_by_id.get(utxo_id) if utxo: utxo_index = self.utxos.index(utxo) if utxo_index not in row_indexes_to_remove: row_indexes_to_remove.append(utxo_index) del self.utxo_by_id[utxo_id] row_indexes_to_remove.sort(reverse=True) for group in consecutive_groups(row_indexes_to_remove, ordering=lambda x: -x): l = list(group) self.beginRemoveRows( QModelIndex(), l[-1], l[0]) # items are sorted in reversed order del self.utxos[l[-1]:l[0] + 1] self.endRemoveRows() if utxos_to_add: # in the model, the rows are sorted by the number of confirmations in the descending order, so put # the new ones in the right place utxos_to_add.sort(key=lambda x: x.block_height, reverse=True) row_idx = 0 self.beginInsertRows(QModelIndex(), row_idx, row_idx + len(utxos_to_add) - 1) try: for index, utxo in enumerate(utxos_to_add): self.add_utxo(utxo, index) finally: self.endInsertRows()
def save_detected_frames(detection_path, output_path, delta = 10): """ Grab list of frames that contain touch indicators in them. Can adjust how many frames before and after start and end of touch indicators for context of interaction by adjusting delta. """ output_path.mkdir(exist_ok = True) with open(detection_path.parent/'detected_frames/detected-frames.json') as f: detections = json.load(f) frames = [d['screenId'] for d in detections] groups = [list(group) for group in mit.consecutive_groups(frames)] for g in groups: s, e = g[0], g[-1] frames.extend(list(range(s - delta, s))) frames.extend(list(range(e + 1, e + delta + 1))) frames = sorted(list(set(frames))) frames = [f for f in frames if f >= 1] for i, frame in enumerate(frames): try: copyfile(detection_path.parent/f'extracted_frames/{frame:04}.jpg', output_path/f'{i:04}.jpg') except: continue
def find_ranges(iterable): # Yield range of consecutive numbers for group in mit.consecutive_groups(iterable): group = list(group) if len(group) == 1: yield group[0] else: yield group[0], group[-1]
def compute_threshold(smoothed_errors, error_buffer, sd_limit=12.0): """Helper method for `extract_anomalies` method. Calculates the epsilon (threshold) for anomalies. """ mu = np.mean(smoothed_errors) sigma = np.std(smoothed_errors) max_epsilon = 0 sd_threshold = sd_limit # The treshold is determined dynamically by testing multiple Zs. # z is drawn from an ordered set of positive values representing the # number of standard deviations above mean(smoothed_errors) # here we iterate in increments of 0.5 on the range that the NASA paper found to be good for z in np.arange(2.5, sd_limit, 0.5): epsilon = mu + (sigma * z) below_epsilon, below_indices, above_epsilon = [], [], [] for i in range(len(smoothed_errors)): e = smoothed_errors[i] if e < epsilon: # save to compute delta mean and delta std # these are important for epsilon calculation below_epsilon.append(e) below_indices.append(i) if e > epsilon: # above_epsilon values are anomalies for j in range(0, error_buffer): if (i + j) not in above_epsilon and ( i + j) < len(smoothed_errors): above_epsilon.append(i + j) if (i - j) not in above_epsilon and (i - j) >= 0: above_epsilon.append(i - j) if len(above_epsilon) == 0: continue # generate sequences above_epsilon = sorted(list(set(above_epsilon))) groups = [ list(group) for group in mit.consecutive_groups(above_epsilon) ] above_sequences = [(g[0], g[-1]) for g in groups if not g[0] == g[-1]] mean_perc_decrease = (mu - np.mean(below_epsilon)) / mu sd_perc_decrease = (sigma - np.std(below_epsilon)) / sigma epsilon = (mean_perc_decrease + sd_perc_decrease) /\ (len(above_sequences)**2 + len(above_epsilon)) # update the largest epsilon we've seen so far if epsilon > max_epsilon: sd_threshold = z max_epsilon = epsilon # sd_threshold can be multiplied by sigma to get epsilon return max_epsilon, sd_threshold
def straight(nums): no_dups_sf = list(dict.fromkeys(nums)) sorted_num = [list(group) for group in mit.consecutive_groups(no_dups_sf)] for lst in sorted_num: if len(lst) >= 5: return lst[-1] if TypeError: return 0
def find_ranges(iterable): import more_itertools as mit for item in mit.consecutive_groups(iterable): group = list(item) if len(group) == 1: yield group[0] - 1, group[0] else: yield group[0] - 1, group[-1]
def find_ranges(iterable): """Yield range of consecutive numbers.""" for group in mit.consecutive_groups(iterable): group = list(group) if len(group) == 1: yield int(1000 * group[0]), int(1000 + 1000 * group[0]) else: yield int(1000* group[0]), int(1000 + 1000*group[-1])
def straight(nums): no_dups = list(dict.fromkeys(nums)) sorted_num = [list(group) for group in mit.consecutive_groups(no_dups)] for lst in sorted_num: if len(lst) >= 5: return 5 else: return 0
def continuous_insertion_deletion_indel(file2_op, line1, line3, line1_start, line3_start, empty_index_iter1, protein_seq): #file2_op = [] o1 = line3 + " --> " + line1 o2 = str(line3_start + empty_index_iter1[0]) + " --> " + str(line1_start + empty_index_iter1[0]) o4 = "" status = "" if "-" * len(empty_index_iter1) == line3: print(empty_index_iter1[0], empty_index_iter1[len(empty_index_iter1) - 1] + 1) o4 = str(line3_start + empty_index_iter1[0]) + "," + protein_seq[ line3_start + empty_index_iter1[0]:line3_start + empty_index_iter1[len(empty_index_iter1) - 1] + 1] + "," + line1 status = "insertion" elif "-" * len(empty_index_iter1) == line1: o4 = str(line3_start + empty_index_iter1[0]) + "," + line3 + ",." status = "deletion" else: if "-" in line1: dash_index = [m.start() for m in re.finditer('-', line1)] dash_index = [ list(group) for group in mit.consecutive_groups(dash_index) ] print(dash_index) elif "-" in line3: print(line1, line3) dash_index = [m.start() for m in re.finditer('-', line3)] remaining_index = list(set(range(len(line3))) - set(dash_index)) remaining_index = [ list(group) for group in mit.consecutive_groups(remaining_index) ] remaining_index.append(dash_index) print(remaining_index) #dash_index = [list(group) for group in mit.consecutive_groups(dash_index)] print(dash_index) o3 = status file2_op.append([o1, o2, o4, o3]) return file2_op
def find_epsilon(e_s, error_buffer, sd_lim=12.0): '''Find the anomaly threshold that maximizes function representing tradeoff between a) number of anomalies and anomalous ranges and b) the reduction in mean and st dev if anomalous points are removed from errors (see https://arxiv.org/pdf/1802.04431.pdf) Args: e_s (array): residuals between y_test and y_hat values (smoothes using ewma) error_buffer (int): if an anomaly is detected at a point, this is the number of surrounding values to add the anomalous range. this promotes grouping of nearby sequences and more intuitive results sd_lim (float): The max number of standard deviations above the mean to calculate as part of the argmax function Returns: sd_threshold (float): the calculated anomaly threshold in number of standard deviations above the mean ''' mean = np.mean(e_s) sd = np.std(e_s) max_s = 0 sd_threshold = sd_lim # default if no winner or too many anomalous ranges for z in np.arange(2.5, sd_lim, 0.5): epsilon = mean + (sd*z) pruned_e_s, pruned_i, i_anom = [], [], [] for i,e in enumerate(e_s): # pruned_e_s (less than threshold) if e < epsilon: pruned_e_s.append(e) pruned_i.append(i) # anomalies (more than threshold) if e > epsilon: for j in range(0, error_buffer): if not i + j in i_anom and not i + j >= len(e_s): i_anom.append(i + j) if not i - j in i_anom and not i - j < 0: i_anom.append(i - j) if len(i_anom) > 0: # preliminarily group anomalous indices into continuous sequences (# sequences needed for scoring) i_anom = sorted(list(set(i_anom))) groups = [list(group) for group in mit.consecutive_groups(i_anom)] # remove list of one element, and return list of tuples of the min_index, and max_index E_seq = [(g[0], g[-1]) for g in groups if not g[0] == g[-1]] perc_removed = 1.0 - (float(len(pruned_e_s)) / float(len(e_s))) mean_perc_decrease = (mean - np.mean(pruned_e_s)) / mean sd_perc_decrease = (sd - np.std(pruned_e_s)) / sd s = (mean_perc_decrease + sd_perc_decrease) / (len(E_seq)**2 + len(i_anom)) # sanity checks (rational cases) if s >= max_s and len(E_seq) <= 5 and len(i_anom) < (len(e_s)*0.5): sd_threshold = z max_s = s return sd_threshold #multiply by sd to get epsilon
def find_ranges(iterable): """Yield range of consecutive numbers.""" import more_itertools as mit for group in mit.consecutive_groups(iterable): group = list(group) if len(group) == 1: yield group[0] else: yield group[0], group[-1]
def getContiguousBlocks(iterable): """ Given an iterable (e.g., range(10)), returns a list of sublists, where each sublist is a contiguous block from the input iterable """ import more_itertools as mit blocks = [list(group) for group in mit.consecutive_groups(iterable)] return blocks
def find_ranges(iterable): """Yield range of consecutive numbers.""" for group in mit.consecutive_groups(iterable): group = list(group) if len(group) == 1: yield group[0], 1 else: #print difference between first and last consecutive number: yield group[0], (group[-1] - group[0])
def pprint_equiv_atoms(self): str_list = [] for consecutive_ints in consecutive_groups(self.equivalent_atoms): ints = list(consecutive_ints) if len(ints) >= 3: str_list.append("..".join([str(ints[0]), str(ints[-1])])) else: str_list.append(" ".join([str(j) for j in ints])) return " ".join(str_list)
def find_ranges(iterable: Iterable[T]) -> Iterable[Tuple[T, int]]: """Extracts consecutive runs from a list of items. :param iterable: List of items. :return: Iterable of (first elem, length). """ for group in more_itertools.consecutive_groups(iterable): group = list(group) yield group[0], len(group)
def extract_uncertain_groups(df, bodypart='nose', uncertainty=0.95): import numpy as np import pandas as pd import more_itertools as mit import os iterable = df[df[bodypart]['likelihood'] < uncertainty].index.values return [list(group) for group in mit.consecutive_groups(iterable)]
def find_ranges(iterable): """Yield range of consecutive numbers.""" for group in mit.consecutive_groups(iterable): group = list(group) if len(group) == 1: yield [group[0], group[0]] else: yield [group[0], group[-1]] return group
def get_longest_continuous_t(sequence_str): count = 0 indices = [i for i, a in enumerate(sequence_str, 1) if a == "T"] indices.sort() signals = [list(group) for group in consecutive_groups(indices)] for i in signals: if len(i) > count: count = len(i) return count
def check_periods_together(df, idx): acData = get_periods_rasterized(df, idx) l = [list(group) for group in mit.consecutive_groups(acData)] nGroups = len(l) print(f"Clustering to {nGroups} group(s)")
def compute_sequences(hand): sequences = [] for suit in {card.suit for card in hand.cards}: temp = sorted([card.rank for card in hand.cards if card.suit == suit]) for group in consecutive_groups(temp): sequence = [Card(rank, suit) for rank in list(group)] if len(sequence) >= 4: sequences.append(sequence) return sequences
def build_unicode_range(cp_set): groups = [] for group in consecutive_groups(sorted(cp_set)): g = list(group) if len(g) == 1: groups.append(f"U+{g[0]:X}") else: groups.append(f"U+{g[0]:X}-{g[-1]:X}") return ", ".join(groups)
def find_insertions(x, insert_positions: list): """Support function for identifying contiguous insertions within a sequence""" ins_positions = [ m for m in insert_positions if x[m] != '-' and x[m] != 'n' ] insertions = [ list(insert) for insert in mit.consecutive_groups(ins_positions) ] return insertions
def getContiguousBlocks(iterable): """ Given an iterable (e.g., range(10)), returns a list of sublists, where each sublist is a contiguous block from the input iterable """ import more_itertools as mit blocks = [list(group) for group in mit.consecutive_groups(iterable)] return blocks
def place_ships(self, random): for ship in ship_lengths.keys(): if not random: self.get_ship_placement(ship) else: ship_length = ship_lengths[ship] available_rows, available_cols = np.where(self.board == -1) ship_placed = False while not ship_placed: orientation = np.random.choice(['RIGHT', 'DOWN']) if orientation == 'RIGHT': row = np.random.randint(0, board_side_length) available_cols_tmp = available_cols[available_rows == row] clumps = [] for clump in consecutive_groups(available_cols_tmp): check = list(clump) if len(check) >= ship_length: clumps.append(check) if len(clumps) == 0: continue else: clump = clumps[np.random.choice(len(clumps))] col = clump[0] + np.random.choice( range(len(clump) - ship_length + 1)) self.board[row, col:col + ship_length] *= -1 ship_placed = True else: col = np.random.randint(0, board_side_length) available_rows_tmp = available_rows[available_cols == col] clumps = [] for clump in consecutive_groups(available_rows_tmp): check = list(clump) if len(check) >= ship_length: clumps.append(check) if len(clumps) == 0: continue else: clump = clumps[np.random.choice(len(clumps))] row = clump[0] + np.random.choice( range(len(clump) - ship_length + 1)) self.board[row:row + ship_length, col] *= -1 ship_placed = True
def fix_points(self, points, eps=1e-4): idx = np.argwhere(np.diff(points) == 0).T.tolist()[0] groups = [list(g) for g in mit.consecutive_groups(idx)] for g in groups: if g[0] == 0: continue glen = len(g) for i, ix in enumerate(g): points[ix] -= (glen - i) * eps return points
def slots_to_ys(slots): ''' convert give slots into a set of y-value ranges ''' # find all y-values values = list() for slot in slots: ybase = nuids.index(slot['uid']) * (cpn + gpn) for cslot in slot['core_map']: for c in cslot: values.append(ybase + c) for gslot in slot['gpu_map']: for g in gslot: values.append(ybase + cpn + g) # find continuous ranges of y-values return [list(group) for group in mit.consecutive_groups(values)]