def similarity_measure_area_of_overlap(arr_fixed: np.array, arr_to_transform: np.array, values: np.array, debug=False): ''' Transforms `arr_to_transform` using affine transformation parameters in `values`, then returns the normalised mutual information between the result and `arr_fixed`. Only the pixels covered by both arrays are considered when calculating the mutual information. ''' assert arr_fixed.shape == arr_to_transform.shape arr_transformed = transform_using_values(arr_to_transform, values, cval=float('-inf')) arr_1 = arr_fixed.ravel() arr_2 = arr_transformed.ravel() arr_1_reduced = [] arr_2_reduced = [] assert len(arr_1) >= 3 if debug: print("min = " + str(arr_to_transform.min())) print(values) for i in range(len(arr_1)): if arr_2[i] >= arr_to_transform.min(): arr_1_reduced.append(arr_1[i]) arr_2_reduced.append(arr_2[i]) if debug: print("Length = " + str(len(arr_2_reduced))) if len(arr_1_reduced) < 3: return 0 else: sm = im.similarity_measure(np.array(arr_1_reduced), np.array(arr_2_reduced), measure="NMI") return sm
def match_dets_and_objs(self, distance_matrix: np.array): """Matches detections with tracked_objects from a distance matrix I used to match by minimizing the global distances, but found several cases in which this was not optimal. So now I just match by starting with the global minimum distance and matching the det-obj corresponding to that distance, then taking the second minimum, and so on until we reach the distance_threshold. This avoids the the algorithm getting cute with us and matching things that shouldn't be matching just for the sake of minimizing the global distance, which is what used to happen """ # NOTE: This implementation is terribly inefficient, but it doesn't # seem to affect the fps at all. distance_matrix = distance_matrix.copy() if distance_matrix.size > 0: det_idxs = [] obj_idxs = [] current_min = distance_matrix.min() while current_min < self.distance_threshold: flattened_arg_min = distance_matrix.argmin() det_idx = flattened_arg_min // distance_matrix.shape[1] obj_idx = flattened_arg_min % distance_matrix.shape[1] det_idxs.append(det_idx) obj_idxs.append(obj_idx) distance_matrix[det_idx, :] = self.distance_threshold + 1 distance_matrix[:, obj_idx] = self.distance_threshold + 1 current_min = distance_matrix.min() return det_idxs, obj_idxs else: return [], []
def print_one_set_statistics(x_train: np.array, y_train: np.array) -> None: """ """ Total_data_num = len(x_train) print("\n________Table : Data portions info_________") t = Texttable() t.add_rows([ ['Data Portion', 'Number', 'Percent'], ['Total', Total_data_num, "{:.0%}".format(1)], ]) print(t.draw()) print("//////////////////////////////////////////////////\n") print( "_______________________Table : Data shape info________________________" ) t = Texttable() t.add_rows([['Name', 'Shape', 'Min', 'Max', 'Type'], [ 'x train', x_train.shape, x_train.min(), x_train.max(), type(x_train) ], [ 'y train', y_train.shape, y_train.min(), y_train.max(), type(y_train) ]]) print(t.draw()) print("//////////////////////////////////////////////////\n")
def normalize(arr: np.array, lower: float = 0.0, upper: float = 1.0) -> tuple: """ Normalize the input data in a range given by [lower, upper] :code:`arrNorm, t = normalize(arr, lower, upper)` Args: arr (np.array): the input data array lower (float): the minimum value of the new range upper (float): the maximum value of the new range Returns: (tuple): tuple containing: arrNorm (np.array) : the normalized data t (np.array) : the corresponding linear transformation s.t. :code:`arrNorm = t[0] * arr + t[1]` """ arr = arr.copy() if lower > upper: lower, upper = upper, lower alpha = (upper - lower) / (arr.max() - arr.min()) t = np.array([alpha, lower - arr.min() * alpha], dtype='float') arr = t[0] * arr + t[1] return arr, t
def normalised_image(arr_in: np.array): ''' Scale the contents of an array to values between 0 and 1. ''' if arr_in.max() == arr_in.min(): return np.ones_like(arr_in) * max(0, min(1, arr_in.max())) return (arr_in - arr_in.min()) / (arr_in.max() - arr_in.min())
def scale_array(array: np.array) -> np.array: """ Scales a numpy array from 0 to 1. Works in 3D Return np.array """ assert array.max() - array.min() > 0 return ((array - array.min()) / (array.max() - array.min())).astype( np.float32)
def min_max(a: np.array) -> np.array: """ get the mean of this window len(v) and compare it to current price """ # x = ((s - s.min()) / (s.max() - s.min())).iat[-1] # return x * 2 - 1 return (a - a.min()) / (a.max() - a.min())
def plot(model, X_test: np.array, y_test: np.array, grid_step=0.02): cmap_light = ListedColormap(["#FFAAAA", "#AAFFAA"]) cmap_bold = ListedColormap(["#FF0000", "#00FF00"]) # calculate min, max and limits x_min, x_max = X_test.min() - 1, X_test.max() + 1 y_min, y_max = y_test.min() - 1, y_test.max() + 1 xx, yy = np.meshgrid( np.arange(x_min, x_max, grid_step), np.arange(y_min, y_max, grid_step) ) # predict class using data and kNN classifier Z = model.predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) plt.figure() plt.pcolormesh(xx, yy, Z, cmap=cmap_light) plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cmap_bold) plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.title("Classification") plt.show()
def set_fit_data(self, x_train: np.array): # 设置训练数据 self._x_train = x_train self._len_train = x_train.shape[0] self._dim = x_train.shape[1] self._alpha = np.array([1 / self._n for _ in range(self._n)]) self._mean = np.array( [i / self._n * (x_train.max(axis=0) - x_train.min(axis=0)) + x_train.min(axis=0) for i in range(self._n)]) self._var = np.array([np.ones(self._dim) for i in range(self._n)])
def linear_reg(xi: np.array, y: list): slope, intercept, r_value, p_value, std_err = stats.linregress(xi, y) line = slope * xi + intercept mape = mean_absolute_percentage_error(y, line) line_x = np.arange(xi.min(), xi.max(), (xi.max() - xi.min()) / 30) if len(xi) > 2 else [] line_y = calculate_line(line_x, slope, intercept) if line_x != [] else [] return line_x, line_y, r_value, mape
def prepareimage(img: np.array) -> np.array: # Remove cut. img = np.vstack((img[0:5, :], img[4, :], img[6:, :])) # Filter image. img = ndimage.gaussian_filter(img, sigma=1.0) # Normalise to [0, 1]. img = np.array(img, dtype=float) img = (img - img.min()) / (img.max() - img.min()) return img
def prepareimage(img: np.array) -> np.array: # Take image after cut. img = img[6:, :] # Filter image. img = ndimage.gaussian_filter(img, sigma=1.0) # Normalise to [0, 1]. img = np.array(img, dtype=float) img = (img - img.min()) / (img.max() - img.min()) return img
def save_chebyshev(self, q: np.array, p: np.array): """ Pre-calculate the Chebyshev polynomials at specified grid points :param p: :param q: :return: None """ # if p and q were supplied before, do not re-calculate if self._p is not p or self._q is not q: # consistency checks assert len(p.shape) == 2 and (p.shape[0] == 1 or p.shape[1] == 1), "Array p must be flat" assert p.min() >= -1. and p.max( ) <= 1., "Range of p must be [-1, 1]" assert len(q.shape) == 2 and (q.shape[0] == 1 or q.shape[1] == 1), "Array q must be flat" assert q.min() >= -1. and q.max( ) <= 1., "Range of q must be [-1, 1]" self._p = p self._q = q self.chebyshev_t_p = get_chebyshev_list(p, np.ones_like(p), p, self.n_basis_vect) self.chebyshev_u_p = get_chebyshev_list(p, np.ones_like(p), 2. * p, self.n_basis_vect) self.chebyshev_t_q = get_chebyshev_list(q, np.ones_like(q), q, self.n_basis_vect) self.chebyshev_u_q = get_chebyshev_list(q, np.ones_like(q), 2. * q, self.n_basis_vect) ############################################################################################################ # # Allocate arrays # ############################################################################################################ self.upsilon1 = np.zeros((p.size, q.size), dtype=np.complex) self.upsilon2 = self.upsilon1.copy() self.diff_p_upsilon1 = self.upsilon1.copy() self.diff_q_upsilon1 = self.upsilon1.copy() self.diff_p_upsilon2 = self.upsilon2.copy() self.diff_q_upsilon2 = self.upsilon2.copy() self.D11 = self.upsilon1.copy() self.D12 = self.D11.copy() self.D22 = self.D11.copy() self.classical_rho = np.zeros_like(self.D11, dtype=np.float)
def prepareimage(img: np.array, idx: int) -> np.array: # Remove first frame. img = img[idx:, :] # Filter image. # img = ndimage.gaussian_filter(img, sigma=1.0) # Normalise to [0, 1]. img = np.array(img, dtype=float) img = (img - img.min()) / (img.max() - img.min()) return img
def normalize_data( data_opm_reference: np.array, data_ensembles_flownet: List[np.array] ) -> Tuple[np.array, List[np.array]]: """ This function normalizes data from a 2D numpy array containing data from the reference simulation and multiple ensembles of FlowNet simulations, using the MinMaxScaler from sklearn.preprocessing module Args: data_opm_reference: is the 2D numpy array containing data from reference simulation replicated into as many columns as the size of ensemble of FlowNet realizations data_ensembles_flownet: is a list of 2D numpy arrays containing data from ensembles of FlowNet simulations; each list entry correspond to the ensemble of a given iteration of ES-MDA Returns: norm_data_opm_reference: is a normalized 2D numpy array for the reference simulation data norm_data_ensembles_flownet: a list of normalized 2D numpy arrays for the ensembles of FlowNet simulations """ for k, data_ens in enumerate(data_ensembles_flownet): if k == 0: tmp = data_ens else: tmp = np.append(tmp, data_ens, axis=1) matrix_data = np.append(data_opm_reference, tmp, axis=1) if np.isclose(data_opm_reference.max(), data_opm_reference.min()): if np.isclose(data_opm_reference.max(), 0.0): scale = np.tile(1.0, matrix_data.shape[0]) else: scale = 1 / (np.tile(data_opm_reference.max(), matrix_data.shape[0])) else: scale = 1 / (np.tile(data_opm_reference.max(), matrix_data.shape[0]) - np.tile(data_opm_reference.min(), matrix_data.shape[0])) norm_matrix_data = (matrix_data * scale[:, None] - (np.tile( data_opm_reference.min(), matrix_data.shape[0]) * scale)[:, None]) n_data = int(norm_matrix_data.shape[1] / (len(data_ensembles_flownet) + 1)) norm_data_opm_reference = norm_matrix_data[:, :n_data] norm_data_ensembles_flownet = [] for k in range(len(data_ensembles_flownet)): norm_data_ensembles_flownet.append( norm_matrix_data[:, (k + 1) * n_data:(k + 2) * n_data]) return norm_data_opm_reference, norm_data_ensembles_flownet
def array_int_replace(a: np.array, replace: Dict[int, int]) -> np.array: """Replace values in `a` using mapping in `replace`. Follows `guidance on SO`_. .. _guidance on SO: https://stackoverflow.com/questions/46868855 """ indexer = np.array([replace.get(i, -1) for i in range(a.min(), a.max() + 1)]) if indexer[indexer < 0].size > 0: raise ValueError('replace dict must contain mappings for all values ' 'in a.') return indexer[(a - a.min())]
def calculate_for_continuous(data_train: np.array, data_valid: np.array) -> Real: kde_desc_train = stats.gaussian_kde(data_train) kde_desc_valid = stats.gaussian_kde(data_valid) interval = np.linspace( start=min(data_train.min(), data_valid.min()), stop=max(data_train.max(), data_valid.max()), num=1000, ) return stats.entropy( kde_desc_train.evaluate(interval) + 1e-10, kde_desc_valid.evaluate(interval) + 1e-10, )
def print_ts_model_stats( actuals: np.array, predicted: np.array, number_as_percentage: float = 100) -> Tuple[float, float, float]: """ This program prints and returns MAE, RMSE, MAPE. If you like the MAE and RMSE as a percentage of something, just give that number in the input as "number_as_percentage" and it will return the MAE and RMSE as a ratio of that number. Returns MAE, MAE_as_percentage, and RMSE_as_percentage """ #print(len(actuals)) #print(len(predicted)) plt.figure(figsize=(15, 8)) dfplot = pd.DataFrame([predicted, actuals]).T dfplot.columns = ['Forecast', 'Actual'] plt.plot(dfplot) plt.legend(['Forecast', 'Actual']) mae = mean_absolute_error(actuals, predicted) mae_asp = (mean_absolute_error(actuals, predicted) / number_as_percentage) * 100 rmse_asp = (np.sqrt(mean_squared_error(actuals, predicted)) / number_as_percentage) * 100 print('MAE (%% AUM) = %0.2f%%' % mae_asp) print('RMSE (%% AUM) = %0.2f%%' % rmse_asp) print('MAE (as %% Actual) = %0.2f%%' % (mae / abs(actuals).mean() * 100)) _ = print_mape(actuals, predicted) rmse = print_rmse(actuals, predicted) mape = print_mape(actuals, predicted) print("MAPE = %0.0f%%" % (mape)) # Normalized RMSE print('RMSE = {:,.Of}'.format(rmse)) print('Normalized RMSE (MinMax) = %0.0f%%' % (100 * rmse / abs(actuals.max() - actuals.min()))) print('Normalized RMSE = %0.0f%%' % (100 * rmse / actuals.std())) return mae, mae_asp, rmse_asp
def get_card(img: Image, coords: np.array) -> Image: img_arr = np.array(img) # shift center = coords.sum(axis=0) / 4 shift_x = img_arr.shape[0] / 2 - center[0] shift_y = img_arr.shape[1] / 2 - center[1] img_arr = shift_image(img_arr, shift_x, shift_y) coords = shift_coords(coords, shift_x, shift_y) # rotate p1 = coords[0] p2 = coords[1] delta_x = p2[0] - p1[0] delta_y = p2[1] - p1[1] theta_radians = math.atan2(-delta_y, delta_x) angle = -theta_radians / math.pi * 180 img_arr = rotate_image(img_arr, angle) coords = rotate_coords(coords, theta_radians, img_arr.shape, radians=True) # crop a = coords.min(axis=0).astype(int) b = coords.max(axis=0).astype(int) box = (a.item(0), a.item(1), b.item(0), b.item(1)) return Image.fromarray(img_arr).crop(box)
def log_histogram(self, tag: str, data: np.array, step: int, num_bars: int = 30): """ Adds a histogram to log. Parameters ---------- tag: str data: np.array Array of any shape. step: int num_bars: int The number of bars if the resulting histogram. """ data = data.ravel() min_ = data.min() max_ = data.max() sum_ = data.sum() sum_sq = data.dot(data) if min_ == max_: num = 1 bucket_limit = [min_] bucket = [len(data)] else: bucket, bucket_limit = np.histogram(data, num_bars) num = len(bucket_limit) bucket_limit = bucket_limit[1:] hist = HistogramProto(min=min_, max=max_, sum=sum_, sum_squares=sum_sq, num=num, bucket_limit=bucket_limit, bucket=bucket) self._write_event(tag, step, histo=hist)
def _warn_available_data(test_dates: Union[float, np.array], dates_with_data: np.array) -> None: """Warn if given dates don't falls within a range dates with measurements Args: test_dates: Dates to check for available data dates_with_data: Dates with data available """ test_dates = np.atleast_1d(test_dates) # In case passed a float if not len(dates_with_data): err_msg = 'No PWV data for primary receiver available on local machine.' raise RuntimeError(err_msg) # Check date falls within the range of available PWV data min_known_date = dates_with_data.min(), if (test_dates < min_known_date).any(): min_date = datetime.utcfromtimestamp(min_known_date) raise ValueError( f'No PWV data found for dates before {min_date} on local machine') max_known_date = dates_with_data.max() if (test_dates > max_known_date).any(): max_date = datetime.utcfromtimestamp(max_known_date) raise ValueError( f'No PWV data found for dates after {max_date} on local machine')
def character_for_2by2_pixels(square: np.array, color_mode: bool = False) -> str: """ Convert 2x2 matrix (non-negative integers) to unicode character representation for plotting. """ assert square.shape == (2, 2) assert square.min() >= 0 # Postprocess to remove everything that is not max color max_color = square.max() if max_color <= 1: binary_square = np.clip(square, a_min=0, a_max=1) else: binary_square = np.clip(square, a_min=max_color - 1, a_max=max_color) - (max_color - 1) # binary_square = np.clip(square, a_min=0, a_max=1) integer_encoding = np.multiply(binary_square, BINARY_ENCODING_MATRIX).sum() char = UNICODE_SQUARES[integer_encoding] if char == "" or not color_mode: return char color_code = list(COLOR_CODES.values())[(square.max() - 1) % len(COLOR_CODES)] return f"{color_code}{char}{COLOR_RESET_CODE}"
def train(self, x: np.array, y: np.array): self.train_x = x self.train_y = y self.n_classes = len(set(self.train_y)) self.mins = x.min(axis=0) self.maxes = x.max(axis=0)
def fit_fcs_in_xy_bin (xybin : Tuple[int, int], selection_map : Dict[int, List[DataFrame]], event_map : DataFrame, n_time_bins : int, time_diffs : np.array, nbins_z : int, nbins_e : int, range_z : Tuple[float, float], range_e : Tuple[float, float], energy : str = 'S2e', z : str = 'Z', fit : FitType = FitType.profile, n_min : int = 100)->FitParTS: """Returns fits in the bin specified by xybin""" i = xybin[0] j = xybin[1] nevt = event_map[i][j] tlast = time_diffs.max() tfrst = time_diffs.min() ts, masks = get_time_series_df(n_time_bins, (tfrst, tlast), selection_map[i][j]) logging.debug(f' ****fit_fcs_in_xy_bin: bins ={i,j}') if nevt > n_min: logging.debug(f' events in fit ={nevt}, time series = {ts}') return time_fcs_df(ts, masks, selection_map[i][j], nbins_z, nbins_e, range_z, range_e, energy, z, fit) else: warnings.warn(f'Cannot fit: events in bin[{i}][{j}] ={event_map[i][j]} < {n_min}', UserWarning) dum = np.zeros(len(ts), dtype=float) dum.fill(np.nan) return FitParTS(ts, dum, dum, dum, dum, dum)
def normalize_to_correlation_space(array: np.array): out = array - array.min() out /= out.max() out = out * 2 - 1 assert out.shape == array.shape and out.min() == -1 and out.max( ) == 1 return out
def _find_nearest_slice_index(list_of_values: np.array, val1, val2=False): try: assert list_of_values.ndim == 1 except AssertionError as e: raise e min_val = list_of_values.min() max_val = list_of_values.max() if not max_val >= val1 >= min_val: raise ValueError( f'specified coordinate value ({val1}) is outside the min/max range: [{min_val}, {max_val}]' ) index1 = (np.abs(list_of_values - val1)).argmin() if val2 is False: return index1 if not max_val >= val2 >= min_val: raise ValueError( f'specified coordinate value ({val2}) is outside the min/max range: [{min_val}, {max_val}]' ) index2 = (np.abs(list_of_values - val2)).argmin() if index1 == index2: return index1 elif index1 < index2: # we'll put this check in there just in case they got the coordinates wrong return slice(index1, index2) else: return slice(index2, index1)
def print_ts_model_stats(actuals: np.array, predicted: np.array) -> Tuple[float, float, float]: """ This program prints and returns MAE, RMSE, MAPE. If you like the MAE and RMSE as a percentage of something, just give that number in the input as "number_as_percentage" and it will return the MAE and RMSE as a ratio of that number. Returns MAE, MAE_as_percentage, and RMSE_as_percentage """ number_as_percentage = actuals.std() plt.figure(figsize=(15, 8)) dfplot = pd.DataFrame([actuals, predicted]).T dfplot.columns = ['Actual', 'Forecast'] plt.plot(dfplot) plt.legend(['actual', 'forecast']) plt.title( 'Random Forest: Actual vs Forecast in expanding (training) Window Cross Validation', fontsize=20) print('\n-------------------------------------------') print('Model Cross Validation Results:') print('-------------------------------------------') mae = mean_absolute_error(actuals, predicted) mae_asp = (mean_absolute_error(actuals, predicted) / number_as_percentage) * 100 print(' MAE (as %% Std Dev of Actuals) = %0.2f%%' % mae_asp) rmse = print_rmse(actuals, predicted) mape = print_mape(actuals, predicted) print(" MAPE (Mean Absolute Percent Error) = %0.0f%%" % (mape)) # Normalized RMSE print('RMSE = {:,.Of}'.format(rmse)) print(' Normalized RMSE (MinMax) = %0.0f%%' % (100 * rmse / abs(actuals.max() - actuals.min()))) rmse_asp = (np.sqrt(mean_squared_error(actuals, predicted)) / number_as_percentage) * 100 print(' Normalized RMSE (as Std Dev of Actuals)= %0.0f%%' % rmse_asp) return rmse, rmse_asp
def cutout(self, shape: np.array, padding: Tuple[int], background: tuple): """Cuts sub image from base image based on input shape. Adds padding if parameter is set. :param shape: :param padding: :param background: :return: """ background = self.get_background(background) rect = cv2.boundingRect(shape) x, y, w, h = rect cropped = self.img[y:y + h, x:x + w].copy() pts = shape - shape.min(axis=0) mask = np.zeros(cropped.shape[:2], np.uint8) cv2.drawContours(mask, [pts], -1, (255, 255, 255), -1) dst = cv2.bitwise_and(cropped, cropped, mask=mask) im2 = np.full((dst.shape[0], dst.shape[1], 3), background, dtype=np.uint8) inverted_mask = cv2.bitwise_not(mask) bg = cv2.bitwise_or(im2, im2, mask=inverted_mask) final = rotate_img(dst + bg, self.orientation, self.background) self.img = final self.img = cv2.copyMakeBorder(final, *padding, cv2.BORDER_CONSTANT, value=self.background)
def __to_one_hot(self, labels: np.array, name='') -> np.array: ''' convert input array (labels) e.g. from: [0, 3, 1] to: [[1, 0, 0, 0], [0, 0, 0, 1], [0, 1, 0, 0]] @param labels: input labels (np.array) with shape (?, 1) @return: to on hot converted array (np.array) ''' try: # check already is one_hot self.check_labels(labels, name) return labels except: # is not one hot...try to convert pass if len(labels.shape) != 1: print( f"to_one_hot: (len(labels.shape({labels.shape})) != 1) don't know what to do" ) if labels.shape[1] == 1: print('strip 1 dimension and go on with to_one_hot') labels = self.strip_1_dims(labels) else: return labels min_label = int(labels.min()) max_label = int(labels.max()) # check labels start at 0 and end at num_classes -1, else shift with -1 if min_label == 1: labels = labels - 1 max_label -= 1 min_label -= 1 if min_label != 0: raise Exception( f'to_one_hot: maximum class should be {self.num_classes} but is {max_label}' ) if max_label != self.num_classes - 1: raise Exception( f'to_one_hot: maximum class should be {self.num_classes - 1} but is {max_label}' ) # check all classes are available for _class in range(0, self.num_classes - 1): if _class not in labels: raise Exception(f'class ({_class}) missing in labels') # convert to one hot to_one_hot = np.zeros([labels.size, self.num_classes ]) # allocate the to_one_hot array to_one_hot[ np.arange(labels.size), labels.astype(np.int32).ravel()] = 1 # convert labels to one_hot return to_one_hot.astype(np.float32)
def qpixmap_from(image: np.array): dwidth, dheight = image.shape # map the data range to 0 - 255 img_8bit = ((image - image.min()) / (image.ptp() / 255.0)).astype(np.uint8) # qimg = QImage(img_8bit.repeat(4), dwidth, dheight, QImage.Format_RGB32) qimg = QImage(img_8bit.repeat(3), dwidth, dheight, QImage.Format_RGB888) return QPixmap(qimg)