def bayes_net(edge_strength_matrix: np.array, filename: str, image) -> None: """ Naive Bayes Net implementation for the given image matrix :param edge_strength_matrix: a 2D numpy vector containing the edge strength matrix :param filename: name of the file we're working with :param image: the original image object to put pixels on :return: None """ # Save for our records imageio.imwrite( "bayes_net/{0}_output_simple.jpg".format( filename.split(".")[0].split("/")[-1]), draw_edge( image=image.copy(), y_coordinates=edge_strength_matrix.argmax(axis=0), color=(0, 0, 255), thickness=5, ), ) # Save for grading system imageio.imwrite( "output_simple.jpg", draw_edge( image=image, y_coordinates=edge_strength_matrix.argmax(axis=0), color=(0, 0, 255), thickness=5, ), ) return None
def get_topics(M: np.array, n: int, id2word: Dict[int, str]) -> Dict[str, List[str]]: M = M if M.shape[0] > M.shape[1] else M.T topics_paper = {} for dim in range(M.shape[1]): topics_paper[f'Topic {dim + 1}'] = [] embeddings = M[np.argwhere(M.argmax(axis=1) == dim).flatten()] indices = np.argwhere(M.argmax(axis=1) == dim).flatten() new_sorting_order = np.argsort(embeddings[:, dim], axis=0)[::-1] embeddings_sorted = embeddings[new_sorting_order].astype( dtype='float64') indices_sorted = indices[new_sorting_order] counter = 0 for embedding, id_ in zip(embeddings_sorted, indices_sorted): if counter < n: topics_paper[f'Topic {dim + 1}'].append(id2word[id_]) counter += 1 if embeddings_sorted.shape[0] < n: while len(topics_paper[f'Topic {dim + 1}']) < (n * 2): topics_paper[f'Topic {dim + 1}'].append('N/A') return topics_paper
def fit_sigmoid(time: np.array, flux: np.array) -> list: """ Find best-fit parameters using scipy.least_squares. Parameters ---------- time : array_like exploratory variable (time of observation) flux : array_like response variable (measured flux) Returns ------- result : list of float best fit parameter values """ flux = np.asarray(flux) t0 = time[flux.argmax()] - time[0] if t0 > 0: dt = time[flux.argmax()] - time[flux.argmin()] slope = (flux.argmax() - flux.argmin()) / dt else: slope = 1. f0 = flux[0] aguess = slope cguess = np.max(flux) if f0 != 0 and cguess / f0 != 1.: bguess = np.log(cguess / f0 - 1.) / aguess else: bguess = 1.0 guess = [aguess, bguess, cguess] result = least_squares(errfunc_sigmoid, guess, args=(time, flux)) return result.x
def __calculate_hsl(self, rgb: np.array): assert rgb.size == 3, "RGB must have 3 values." normalized_rgb = rgb / 255 normalized_red = normalized_rgb[self.RED_INDEX] normalized_green = normalized_rgb[self.GREEN_INDEX] normalized_blue = normalized_rgb[self.BLUE_INDEX] min_rgb = normalized_rgb.min() max_rgb = normalized_rgb.max() luminance = (min_rgb + max_rgb) / 2 if min_rgb == max_rgb: return luminance, 0., 0. saturation = (max_rgb - min_rgb) / (max_rgb + min_rgb) if luminance < .5 else \ (max_rgb - min_rgb) / (2. - max_rgb - min_rgb) max_rgb_arg = rgb.argmax() if max_rgb_arg == self.RED_INDEX: hue = (normalized_green - normalized_blue) / (max_rgb - min_rgb) elif max_rgb_arg == self.GREEN_INDEX: hue = 2. + (normalized_blue - normalized_red) / (max_rgb - min_rgb) elif max_rgb_arg == self.BLUE_INDEX: hue = 4. + (normalized_red - normalized_green) / (max_rgb - min_rgb) else: assert False, "Index of max_arg should be in [RED_INDEX, BLUE_INDEX]" hue *= 60 hue = hue + 360 if hue < 0 else hue return luminance, saturation, hue
def sparsenet(G: nx.Graph, distance_matrix: np.array, vertex_to_index: dict): """ Finds the SparseNet for a given graph. Args: G: a networkx graph distance_matrix: a 2d numpy array where distance_matrix[i][j] is the distance from node i to node j vertex_to_index: a dictionary which maps the names of vertices to an integer in {0, 1, 2, ..., |V|}. Returns a generator where each output is a path in the configuration. The first path is the longest path, """ index_to_vertex = {i: v for v, i in vertex_to_index.items()} src_index, dest_index = np.unravel_index(distance_matrix.argmax(), distance_matrix.shape) configuration = [ nx.shortest_path(G, index_to_vertex[src_index], index_to_vertex[dest_index]) ] yield configuration[-1] while True: farthest_from_config, closest_point_on_config = point_farthest_from_configuration( distance_matrix, configuration, vertex_to_index) path = nx.shortest_path(G, farthest_from_config, closest_point_on_config) if len(path) < 2: break configuration.append(path) yield path
def cross_entropy_error(y: np.array, t: np.array) -> float: ''' loss = - SUM(t[i] * log(x[i])) ''' # reshape 1-d vector to (1,n) matrix # if y.ndim == 1: # t = t.reshape(1, t.size) # y = y.reshape(1, y.size) # batch_size = y.shape[0] # delta = 1e-7 # return -np.sum(t * np.log(y + delta)) / batch_size # return -np.sum(np.log(x[np.arange(batch_size), t] + delta)) / batch_size # reshape 1-d vector to (1,n) matrix if y.ndim == 1: t = t.reshape(1, t.size) y = y.reshape(1, y.size) # If training data is one-hot-vectorm convert to correct index if t.size == y.size: t = t.argmax(axis=1) batch_size = y.shape[0] # only need to calculate the true label return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
def get_performance_metrics(y_true: np.array, y_score: np.array, accuracy_cutoff: float = 0.5): """Get current performance metrics. Parameters ---------- y_true: np.array y_score: np.array accuracy_cutoff: float When to predict positive predictions in binary case. """ if len(y_score.shape) == 1: y_pred = (y_score > accuracy_cutoff).astype(int) if len(np.unique(y_true)) == 2: auc = roc_auc_score(y_true, y_score) f1 = f1_score(y_true, y_pred) ap = average_precision_score(y_true, y_score) else: y_pred = y_score.argmax(-1) auc = 0 f1 = 0 ap = 0 acc = accuracy_score(y_true, y_pred) return auc, f1, ap, acc
def get_topics( M: np.array, n: int, id2word: Dict[int, str], save_dir: str = None ) -> Tuple[Dict[str, List[str]], Dict[str, List[Tuple[str, int, np.array]]]]: M = M if M.shape[0] > M.shape[1] else M.T topics_paper = {} topics_all = {} for dim in range(M.shape[1]): topics_paper[f'Topic {dim + 1}'] = [] topics_all[f'Topic {dim + 1}'] = [] embeddings = M[np.argwhere(M.argmax(axis=1) == dim).flatten()] indices = np.argwhere(M.argmax(axis=1) == dim).flatten() new_sorting_order = np.argsort(embeddings[:, dim], axis=0)[::-1] embeddings_sorted = embeddings[new_sorting_order].astype( dtype='float64') indices_sorted = indices[new_sorting_order] counter = 0 for embedding, id_ in zip(embeddings_sorted, indices_sorted): if counter < n: topics_paper[f'Topic {dim + 1}'].append(id2word[id_]) topics_paper[f'Topic {dim + 1}'].append( f'({np.round(embedding.max(), 3)})') if counter >= n and not save_dir: break topics_all[f'Topic {dim + 1}'].append( (id2word[id_], id_, embedding)) counter += 1 if embeddings_sorted.shape[0] < n: while len(topics_paper[f'Topic {dim + 1}']) < (n * 2): topics_paper[f'Topic {dim + 1}'].append('N/A') if save_dir: pickle.dump(topics_all, open(os.path.join(save_dir, 'topics_all.p'), 'wb')) return topics_paper, topics_all
def _argmax2d(xs: np.array) -> Tuple[int, int]: assert len(xs.shape) == 2 n_col = xs.shape[1] ij = xs.argmax() i = ij // n_col j = ij % n_col return i, j
def convertRGBToHSVColor(colours : np.array): col_arr = np.float32(colours) min_col = colours.argmin() max_col = colours.argmax() chroma = col_arr[max_col] - col_arr[min_col] hue = getHueFromChroma(col_arr, chroma, max_col, min_col) value = np.mean(col_arr) saturation = getSatuationFromChroma(col_arr, chroma, value) return np.array([hue, saturation, value], dtype = np.uint8)
def get_action(self, legal_actions: Actions, q_values: np.array) -> Action: indices: List[int] = [row * self.board_size + col for (row, col) in list(legal_actions)] q_values: np.array = q_values[0, indices] index: int = q_values.argmax() location: Location = list(legal_actions)[index] directions: Directions = legal_actions[location] action: Action = (location, directions) return action
def get_topic_counts_of_vocab(M: np.array) -> Dict: M = M if M.shape[0] > M.shape[1] else M.T topic_counts = Counter(M.argmax(axis=1)) topic_counts = { f'Topic {topic + 1}': count for topic, count in sorted(topic_counts.items()) } return topic_counts
def cross_entropy(target: np.array, preds: np.array): res = 0 EPS = 1e-10 for c in range(target.shape[1]): tmp = np.array(target.argmax(1) == c, dtype=np.int) tmp2 = np.log(preds[:, c] + EPS) res += np.sum(tmp * tmp2) return -res
def get_most_confused( df: pd.DataFrame, path_column: str, predictions: np.array, labels: np.array, image_count: int, difference_rate: Optional[float] = None, plot: Optional[bool] = True, random_plot: Optional[bool] = True, ) -> pd.DataFrame: """ Plots given number of images from DataFrame, which predicted values differs from real values by specified difference_rate. :param df: DataFrame you use for predictions. :param path_column: Column where image paths are specified :param predictions: Predictions tensor. :param labels: Label tensor. :param image_count: number of images you want to plot :param difference_rate: percentage of difference between predicted and real values (0-1) :param plot: If True images are plotted, otherwise returns pd.DataFrame. Default True :param random_plot: If True selects images randomly. Default True :return: pd.DataFrame """ max_predictions = predictions.argmax(axis=1) data = {'pred_label / label / df_label': [], 'image': []} for i, (a, b, c) in enumerate( zip(max_predictions, labels, df['AdoptionSpeed'].tolist())): if not difference_rate: if a != b: data['pred_label / label / df_label'].append( f'{a} / {b} / {c}') data['image'].append(df.iloc[i].image) else: y_hat = int(a) y = int(b) if predictions[i][y_hat] - predictions[i][y] >= difference_rate: predicted = str(round(predictions[i][y_hat], 4)) real_class = str(round(predictions[i][y], 4)) data['pred_label / label / df_label'].append( f'{y_hat}: {predicted} / {y}: {real_class} / {c}') data['image'].append(df.iloc[i].image) confused_df = pd.DataFrame(data) if not plot: return confused_df print( f'From {len(df)} images of tested dataframe {len(confused_df)} images were predicted incorrectly with the difference_rate = {difference_rate}.\n' ) plot_df_images(confused_df, 'image', image_count, 'pred_label / label / df_label', random_plot=random_plot)
def find_first(arr: np.array) -> int: """ Finds the index of the first instance of true in a vector or None if not found. """ if len(arr) == 0: return None idx = arr.argmax() # Numpy argmax will return 0 if no True is found if idx == 0 and not arr[0]: return None return idx
def cross_entropy_error(y: np.array, t: np.array) -> np.array: if y.ndim == 1: t = t.reshape(1, t.size) y = y.reshape(1, y.size) # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換 if t.size == y.size: t = t.argmax(axis=1) batch_size = y.shape[0] return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
def ProbToPred(matrix_prob: np.array, method: str, threhold=0.75): if method == 'max': res = np.zeros_like(matrix_prob) res[np.arange(len(matrix_prob)), matrix_prob.argmax(1)] = 1 return res elif method == 'threhold': res = np.copy(matrix_prob) res[res >= threhold] = 1 res[res < threhold] = 0 return res else: raise Exception('unknow method, method must be max or threhold')
def __run_circuit(self, registers: np.array, mem: np.array, gates: np.array, controller_coefficients: np.array, debug: DebugTimestep) -> (np.ndarray, np.ndarray): # Initially, only the registers may be used as inputs. gate_inputs = registers # Debug purpose, dictionary for gates and regs history debug_step_gates = dict() debug_step_regs = dict() debug_previous_mod_regs = dict() debug.mem_previous_mod = mem.argmax(axis=1) # Run through all the gates. ptr = 0 for i, (gate, coeffs) in enumerate(zip(gates, controller_coefficients)): output, mem, args = self.__run_gate(gate_inputs, mem, gate, coeffs) gate_info = dict() for i in range(gate.arity): gate_info[str(i)] = [coeffs[i].argmax(), args[i].argmax()] gate_info["res"] = output.argmax() debug_step_gates[gate.__str__()] = gate_info # Append the output of the gate as an input for future gates. gate_inputs = np.concatenate([gate_inputs, output]) debug.gates = debug_step_gates debug.mem = mem.argmax(axis=1) # All leftover coefficients are for registers. for i, coeff in enumerate(controller_coefficients[len(gates):]): debug_previous_mod_regs[str(i)] = [ coeff.argmax(), gate_inputs[i].argmax() ] gate_inputs[i] = self.avg(gate_inputs, coeff) debug_step_regs[str(i)] = [coeff.argmax(), gate_inputs[i].argmax()] debug.regs_previous_mod = debug_previous_mod_regs debug.regs = debug_step_regs return gate_inputs[np.arange(self.context.num_regs)], mem
def get_score_str(scores: np.array): max_index = scores.argmax() string = f'' for i, score in enumerate(scores.tolist()): if i == max_index: string += f'{G}' else: string += f'{RE}' string += f'{round(float(score), 5):<10}{RE}' return string
def _transform_arrays( y_true: np.array, y_pred: np.array, multi_label: bool, binary: bool, binary_cutoffs: List[float] = None) -> (np.array, np.array): if binary: if len(y_pred.shape) > 1: y_pred = np.reshape(y_pred, -1) if len(y_true.shape) > 1: y_true = np.reshape(y_true, -1) assert ( y_true.shape == y_pred.shape and len(y_true.shape) == 1 ), f'Shapes of predictions and labels for binary classification should conform to (n_samples,) but received {y_pred.shape} and {y_true.shape}.' if binary_cutoffs is None: binary_cutoffs = compute_binary_cutoffs(y_true, y_pred) y_pred_transformed = np.where(y_pred > binary_cutoffs, 1, 0) y_true_transformed = y_true elif multi_label: assert ( y_true.shape == y_pred.shape ), f'Shapes of predictions and labels for multilabel classification should conform to (n_samples, n_classes) but received {y_pred.shape} and {y_true.shape}.' if binary_cutoffs is None: binary_cutoffs = compute_binary_cutoffs(y_true, y_pred) y_pred_transformed = np.where(y_pred > binary_cutoffs, 1, 0) y_true_transformed = y_true else: if y_true.shape[1] > 1: y_true_transformed = np.zeros_like(y_true) y_true_transformed[range(len(y_true)), y_true.argmax(1)] = 1 if y_pred.shape[1] > 1: y_pred_transformed = np.zeros_like(y_pred) y_pred_transformed[range(len(y_pred)), y_pred.argmax(1)] = 1 assert ( y_true.shape == y_pred.shape ), f'Shapes of predictions and labels for multiclass classification should conform to (n_samples,n_classes) but received {y_pred.shape} and {y_true.shape}.' return y_true_transformed, y_pred_transformed
def eval_accuracy(_model: Sequential, data_x: np.array, data_y: np.array) -> np.float64: """ Determines the accuracy of the predicted keys and the expected keys after inverting prediction. (by invert, this basically means to realign the predictions with respective expected result.) (accuracy is higher after performing the shift, may require a change to how training data is given during fitting) :param _model: the LSTM model :param data_x: the input data to analyze :param data_y: the expected key data :return: the accuracy as a float from 0.0 to 1.0 """ predicted_key = _model.predict(data_x, BATCH_SIZE) inverted_key_as_classes = np.roll(predicted_key.argmax(axis=1), -N_STEPS) expected_key_as_classes = data_y.argmax(axis=1) return np.equal(inverted_key_as_classes, expected_key_as_classes).sum() / data_y.shape[0]
def __init__(self, strength: float, blocks_start: np.array, blocks_length: np.array, range: tuple = None, positive: bool = False): Prox.__init__(self, range) if any(length <= 0 for length in blocks_length): raise ValueError("all blocks must be of positive size") if any(start < 0 for start in blocks_start): raise ValueError("all blocks must have positive starting indices") if type(blocks_start) is list: blocks_start = np.array(blocks_start, dtype=np.uint64) if type(blocks_length) is list: blocks_length = np.array(blocks_length, dtype=np.uint64) if blocks_start.dtype is not np.uint64: blocks_start = blocks_start.astype(np.uint64) if blocks_length.dtype is not np.uint64: blocks_length = blocks_length.astype(np.uint64) if blocks_start.shape != blocks_length.shape: raise ValueError("``blocks_start`` and ``blocks_length`` " "must have the same size") if any(blocks_start[1:] < blocks_start[:-1]): raise ValueError('``block_start`` must be sorted') if any(blocks_start[1:] < blocks_start[:-1] + blocks_length[:-1]): raise ValueError("blocks must not overlap") self.strength = strength self.positive = positive self.blocks_start = blocks_start self.blocks_length = blocks_length if range is None: self._prox = _ProxBinarsity(strength, blocks_start, blocks_length, positive) else: start, end = self.range i_max = blocks_start.argmax() if end - start < blocks_start[i_max] + blocks_length[i_max]: raise ValueError("last block is not within the range " "[0, end-start)") self._prox = _ProxBinarsity(strength, blocks_start, blocks_length, start, end, positive)
def get_probs(self, Q_s: np.array, epsilon: float, nA: int): """ Returns the epsilon-greedy weights for sampling next action Parameters ---------- Q_s : np.array Current Q estimate for each action available at current state epsilon : float epsilon value for epsilon greedy search nA : int Number of actions available for current state """ policy_s = np.zeros(nA) + epsilon / nA a_star = Q_s.argmax() policy_s[a_star] += 1 - epsilon return policy_s
def classify_prepare( self, input: np.array, pred: np.array, target: Union[np.array, int], prob: np.array, target_int: int, meta: dict ): return { 'y_pred': prob.argmax(), 'img': self.img(input, pred, target, meta), 'metric_diff': 1 - prob[target_int] }
def calculate_train_accuracy(targets_path: str, probs: np.array) -> float: """ Calculate accuracy on training data. Parameters ---------- targets_path : str Path to fastText normalized training data. probs : np.array Class probability distribution for each line the `targets_path` Returns ------- accuracy : float """ targets = read_labels(targets_path) preds = probs.argmax(axis=1) assert len(targets) == len(preds) return (targets == preds).sum() / len(targets)
def fit_sigmoid(time: np.array, flux: np.array) -> list: """ Find best-fit parameters using scipy.least_squares. Parameters ---------- time : array_like exploratory variable (time of observation) flux : array_like response variable (measured flux) Returns ------- output : list of float best fit parameter values """ flux = np.asarray(flux) t0 = time[flux.argmax()] - time[0] guess = [1, t0 / 2, np.max(flux)] result = least_squares(errfunc_sigmoid, guess, args=(time, flux)) return result.x
def calculate_metrics_by_thresh_multi( y_true: np.array, y_prob: np.array, metrics: Union[Callable, Sequence[Callable]], thresholds: Optional[Sequence] = None, ) -> pd.DataFrame: """Calculate multiclass metrics as a function of threshold Takes prediction to be the position of the column in `y_prob` with the greatest value if that value is greater than the threshold, `np.nan` otherwise. Parameters: - `y_true`: Ground-truth values - `y_prob`: Probability distributions - `metrics`: Callables that take `y_true`, `y_pred` as positional arguments and return a number. Must have a `__name__` attribute. - `thresholds`: `Sequence` of `float` threshold values to use. By default uses 0 and all values that appear in `y_prob`, which is a minimal set that covers all of the relevant possibilities. One reason to override that default would be to save time with a large dataset. Returns: DataFrame with one column "thresh" indicating the thresholds used and an additional column for each input metric giving the value of that metric at that threshold. """ thresh_picker = ( (lambda y_prob: thresholds) if thresholds is not None else (lambda y_prob: sorted(np.hstack([0, np.unique(y_prob)])))) return _calculate_metrics_by_thresh( y_true=y_true, y_prob=y_prob, metrics=metrics, prob_to_pred=lambda y_prob, thresh: np.where( y_prob.max(axis=1) > thresh, y_prob.argmax(axis=1), np.nan), thresh_picker=thresh_picker, )
def step_simulation(self, action: np.array, fixed_steps=1) -> State: """Perturb the simulator with an arbitrary action""" end = False for i in range(fixed_steps): observed, reward, _end, info = self.env.step(action.argmax()) self._cum_reward += reward end = end or _end if end: break if hasattr(self.env.unwrapped, "state"): microstate = copy.deepcopy(self.env.unwrapped.state) else: microstate = 0 # State will not be stored. Implement a subclass if you need it. self.state.reset_state() self.state.update_state( observed=observed, microstate=microstate, reward=self._cum_reward, end=end, # model_action=action, policy_action=action, model_data=[info]) if end: self.env.reset() return self.state
def evaluate_accuracy(x: np.array, y: np.array) -> float: if x.shape[1] == 1: return np.mean((y > 0) == (x > 0)).item() return np.mean(y.squeeze().argmax(dim=1) == x.argmax(dim=1)).item()
def accuracy(y_hat: np.array, y: np.array): tmp = y_hat.argmax(axis=1) == y # type: np.ndarray return np.mean(tmp)