def erode_data(data, footprint = None, structure = None): if footprint is not None: eroded_data = grey_erosion(data, footprint = footprint, structure = structure) else: eroded_data = grey_erosion(data, size=(10, 10)) return eroded_data
def extract_fundamental(amplitude): fundamental = np.zeros(amplitude.shape) # TODO: replace all of this with real code or at least clean it up # it should just be one big numpy thingy f_band_min = -4 f_band_max = 8 f_band_len = f_band_max - f_band_min f_band_coeffs = (1 - np.concatenate( (np.array(range(f_band_min, 0)) / f_band_min, np.array(range(f_band_max)) / f_band_max)))[:, np.newaxis] peak_finder = np.array([-0.5, -0.5, 2, -0.5, -0.5])[:, np.newaxis].T console.time("big loop") freqs = np.argmax(np.mean(amplitude[:50], axis=2), axis=0) # console.stats(freqs) for t in range(amplitude.shape[1]): f = freqs[t] # handle case where 2nd harmonic > first if np.mean(amplitude[f // 2, t]) > 0.4 * np.mean(amplitude[f, t]): f = f // 2 freqs[t] = f if f > 5: f_min = f + f_band_min f_max = f + f_band_max fundamental[f_min:f_max, t] = f_band_coeffs * amplitude[f_min:f_max, t] console.timeEnd("big loop") console.time("remove dots") mask = (grey_dilation(grey_erosion(fundamental, structure=np.ones((3, 5, 1))), structure=np.ones((6, 12, 1))) > 0.1) console.timeEnd("remove dots") fundamental *= mask return fundamental
def get_sibilants(content_amplitude, content_fundamental_amps): num_freqs, num_timesteps, _ = content_amplitude.shape output = content_amplitude.copy() clipped_amps = content_fundamental_amps.copy() clipped_amps[clipped_amps < 0.5] = 0 output -= 2 * clipped_amps[np.newaxis, :, np.newaxis] output = np.clip(output, 0, 1) # sigh output = scipy.ndimage.filters.gaussian_filter1d(output, 4, axis=0, mode="nearest") clipped_output = np.clip(output, 0, 1) thresh = 0.3 clipped_output[output > thresh] = 1 clipped_output[output <= thresh] = 0 clipped_output = grey_erosion( (clipped_output * 255), structure=np.ones((32, 1, 1))) / 255.0 # TODO: these parts are hacky :( clipped_output[:300] = 0 output *= clipped_output output[output > 0.1] *= 4 output = np.clip(output, 0, 1) output = scipy.ndimage.filters.gaussian_filter1d(output, 128, axis=0, mode="nearest") output = np.sqrt(output) return output
def _grey_erosion(self): vol_name = str(self.out_edit.text()) num = self.size_combo.currentIndex() + 3 size = (num, num, num) mode = self.mode_combo.currentText() cval = self.cval_edit.text() if not vol_name: self.out_edit.setFocus() return try: cval = int(cval) except ValueError: self.cval_edit.selectAll() return if cval > 255 or cval < 0: print "cval must be 0-255!" return source_row = self.source_combo.currentIndex() source_data = self._model.data(self._model.index(source_row), Qt.UserRole + 5) new_vol = morphology.grey_erosion(source_data, size=size, mode=mode, cval=cval) self._model.addItem(new_vol, None, vol_name, self._model._data[0].get_header()) self.done(0)
def post_process(self): """ Generate the metrics from the stored raw ray trace results. """ data = self.traverse_results ## Check raw traverse results for continguous regions of no ground coverage. none_runs = [ len(list(grp)) for k, grp in itertools.groupby(data, lambda x: x) if k is None ] if none_runs and max(none_runs) >= self.contiguous: ## Fail because there is a contiguous region larger than allowed fwd = "Fail" aft = "Fail" no_fire_area = "Fail" else: trav_cen = get_translation(self.weapon.trans_trav) angles = np.array([x[0] for x in data if x[1] is not None]) muz_pts = np.array([x[1][0] for x in data if x[1] is not None]) gnd_pts = np.array([x[1][1] for x in data if x[1] is not None]) ## Distance along the ground gnd_dist = np.sqrt((trav_cen[0] - gnd_pts[:, 0])**2 + (trav_cen[2] - gnd_pts[:, 2])**2) ## Calculate the minimum shoot distance over contiguous regions mod_dist = grey_erosion(gnd_dist, self.contiguous, mode="wrap") ## Calculate an area metric (A = 0.5 * a.b.sin(c_ang)) num_tris = len(angles) no_fire_area = 0.0 for i in xrange(num_tris): next_ang = (angles[0] + 2 * pi) if i + 1 >= num_tris else angles[i + 1] c_ang = next_ang - angles[i] a = gnd_dist[i] b = gnd_dist[(i + 1) % num_tris] no_fire_area += 0.5 * a * b * sin(c_ang) ## Calculate min forward and aft shoot distance. fwd = np.max(mod_dist[np.where((angles >= 3 * pi / 2) | (angles <= pi / 2))]) aft = np.max(mod_dist[np.where((angles <= 3 * pi / 2) & (angles >= pi / 2))]) self.post_process_2d(angles, gnd_dist, mod_dist, no_fire_area, fwd, aft) if self.show_3d: self.post_process_3d(muz_pts, gnd_pts) ## Write results to json file and echo to log. out = { "min_fire_dist_fore_180": fwd, "min_fire_dist_aft_180": aft, "no_fire_area": no_fire_area } tba.write_results(out)
def jeff_coral_finder(im, sand_intensity_threshold, coral_gradient_threshold, maximum_altseqfilt_radius, shadow_discriminant_threshold, shadow_discriminant_scaling): im_grey = N.asarray(im.convert("L")) im = N.asarray(im) dot = N.array([[0,1,0], [1,1,1], [0,1,0]]) dilated = morphology.grey_dilation(im_grey, dot.shape, structure=dot) eroded = morphology.grey_erosion(im_grey, dot.shape, structure=dot) gradient = dilated - eroded fisher_discriminant = N.dot(im, shadow_discriminant_scaling) # Make initial class determinations. is_shadow = fisher_discriminant < shadow_discriminant_threshold is_sand = im_grey > sand_intensity_threshold is_smooth = gradient < coral_gradient_threshold is_coral = is_smooth & ~is_sand & ~is_shadow # Now perform an alternating sequence filter on coral, for radius in range(1, maximum_altseqfilt_radius+1): se = disk_strel(radius) opened = morphology.binary_opening(is_coral, se) is_coral = morphology.binary_closing(opened, se) # Now perform an alternating sequence filter on sand. for radius in range(1, maximum_altseqfilt_radius+1): se = disk_strel(radius) opened = morphology.binary_opening(is_sand, se) is_sand = morphology.binary_closing(opened, se) # Use coral mask to exclude sand. is_sand = is_sand & ~is_coral return is_sand, is_coral
def prepare_data_fgbg_weigthed(batch_x, batch_y, border_weight=2, separation_border_weight=5, sigma=1): # Wrap x in np array and add channel dimension out_x = np.array(batch_x)[..., None] # TODO input with channels? # Create the weight map struct = morphology.generate_binary_structure(len(batch_y[0].shape), 1) foreground = np.zeros_like(batch_y) weight_map = np.zeros_like(batch_y, dtype='float32') for i, mask in enumerate(batch_y): borders = morphology.morphological_laplace(mask, structure=struct) \ > (np.max(mask) + 1) separation_borders = np.logical_and( morphology.grey_erosion(mask, structure=struct), borders) weight_map[i] = separation_border_weight * separation_borders \ + border_weight * borders \ + 1 # Filter weight map if sigma > 0: weight_map[i] = filters.gaussian_filter(weight_map[i], sigma=sigma) # Foreground is the mask without the borders foreground[i] = np.logical_and((mask > 0), np.logical_not(borders)) background = np.logical_not(foreground) out_y = np.array(np.stack([foreground, background], axis=-1), dtype='float32') return [out_x, weight_map], out_y
def erosion(parameters): """Erodes a greyscale image. For the simple case of a full and flat structuring element, it can be viewed as a minimum filter over a sliding window. It wraps `scipy.ndimage.morphology.grey_erosion`. The `footprint`, `structure`, `output`, `mode`, `cval` and `origin` options are not supported. Keep in mind that `mode` and `cval` influence the results. In this case the default mode is used, `reflect`. :param parameters['data'][0]: input array :type parameters['data'][0]: numpy.array :param parameters['size']: which neighbours to take into account, defaults to (3, 3) a.k.a. numpy.ones((3, 3)) :type parameters['size']: list :return: numpy.array """ data = parameters['data'][0] size = tuple(parameters['size']) return morphology.grey_erosion(data, size=size)
def _grey_erosion(self): vol_name = str(self.out_edit.text()) num = self.size_combo.currentIndex() + 3 size = (num, num, num) mode = self.mode_combo.currentText() cval = self.cval_edit.text() if not vol_name: self.out_edit.setFocus() return try: cval = int(cval) except ValueError: self.cval_edit.selectAll() return if cval>255 or cval<0: print "cval must be 0-255!" return source_row = self.source_combo.currentIndex() source_data = self._model.data(self._model.index(source_row), Qt.UserRole + 5) new_vol = morphology.grey_erosion(source_data,size=size,mode=mode,cval=cval) self._model.addItem(new_vol, None, vol_name, self._model._data[0].get_header()) self.done(0)
def _adapt(self, video_idx, frame_idx, last_mask, get_posteriors_fn): eroded_mask = grey_erosion(last_mask, size=(self.erosion_size, self.erosion_size, 1)) dt = distance_transform_edt(numpy.logical_not(eroded_mask)) adaptation_target = numpy.zeros_like(last_mask) adaptation_target[:] = VOID_LABEL current_posteriors = get_posteriors_fn() positives = current_posteriors[:, :, 1] > self.posterior_positive_threshold if self.use_positives: adaptation_target[positives] = 1 threshold = self.distance_negative_threshold negatives = dt > threshold if self.use_negatives: adaptation_target[negatives] = 0 do_adaptation = eroded_mask.sum() > 0 if self.debug: adaptation_target_visualization = adaptation_target.copy() adaptation_target_visualization[adaptation_target == 1] = 128 if not do_adaptation: adaptation_target_visualization[:] = VOID_LABEL from scipy.misc import imsave folder = self.val_data.video_tag().replace("__", "/") imsave("forwarded/" + self.model + "/valid/" + folder + "/adaptation_%05d.png" % frame_idx, numpy.squeeze(adaptation_target_visualization)) self.train_data.set_video_idx(video_idx) for idx in range(self.n_adaptation_steps): do_step = True if idx % self.adaptation_interval == 0: if do_adaptation: feed_dict = self.train_data.feed_dict_for_video_frame(frame_idx, with_annotations=True) feed_dict[self.train_data.get_label_placeholder()] = adaptation_target loss_scale = self.adaptation_loss_scale adaption_frame_idx = frame_idx else: print("skipping current frame adaptation, since the target seems to be lost", file=log.v4) do_step = False else: # mix in first frame to avoid drift # (do this even if we think the target is lost, since then this can help to find back the target) feed_dict = self.train_data.feed_dict_for_video_frame(frame_idx=0, with_annotations=True) loss_scale = 1.0 adaption_frame_idx = 0 if do_step: loss, _, n_imgs = self.trainer.train_step(epoch=idx, feed_dict=feed_dict, loss_scale=loss_scale, learning_rate=self.adaptation_learning_rate) assert n_imgs == 1 print("adapting on frame", adaption_frame_idx, "of sequence", video_idx + 1, \ self.train_data.video_tag(video_idx), "loss:", loss, file=log.v4) if do_adaptation: return negatives else: return None
def filtering(img, filter_type="gaussian", filter_value=0.5): """ :Parameters: - `image` (openalea.image.SpatialImage) - image - `filter_type`(str) - denoising method used for filtering ("gaussian" or "asf" for alternate sequential filter). default is "gaussian". - `filter_value` (float for "gaussian" filter or int for alternate sequential filter) - value used for the filtering : * for a Gaussian filtering, the "filter_value" corresponds to the standard deviation. * for a Alternate Sequential Filter, the "filter_value" corresponds to the number of succession of morphological opening and closing operations. """ # if not isinstance(img, SpatialImage): # img = SpatialImage(img) # # if filter_type == 'gaussian': # if not isinstance(filter_value, float): # raise RuntimeError, 'value used for Gaussian filtering must be a float type' # else: # img = recfilters(img, filter_type="sigma", filter_value=filter_value, Trueyz=(0, 0, 0)) if filter_type == 'asf': if not isinstance(filter_value, int): raise RuntimeError( 'value used for the Alternate Sequential Filter must be a integer type' ) else: for rad in range(1, ((filter_value + 1) / 2) + 1): print( "closing operations with structuring elements of size %s" % rad) struct = euclidean_sphere(rad) # ~ s=(rad,rad,rad) img = grey_dilation(img, footprint=struct) img = grey_erosion(img, footprint=struct) if filter_value >= rad * 2: print( "opening operations with structuring elements of size %s" % rad) img = grey_erosion(img, footprint=struct) img = grey_dilation(img, footprint=struct) else: raise RuntimeError('filter type not supported') return img
def ruined_pic(pic): ''' alter the number of pixels in the image: increase or decrease character width ''' val = random.randint(1, 2) if val == 1: pic = grey_dilation(pic.reshape(64, 64), size=(2, 1)) else: pic = grey_erosion(pic.reshape(64, 64), size=(2, 1)) return pic
def threshold(g,sigma,frac,nbins,erosion_diameter): g = background_subtract(g,strel()) g = gaussian_blur(g,sigma) g = np.round(g) gthreshold = get_poisson_threshold(g,frac=frac,nbins=nbins) gt = g.copy() gt[np.where(g<gthreshold)] = 0.0 gto = morphology.grey_erosion(gt,footprint=strel(diameter=erosion_diameter)) return gto
def post_process(self): """ Generate the metrics from the stored raw ray trace results. """ data = self.traverse_results ## Check raw traverse results for continguous regions of no ground coverage. none_runs = [len(list(grp)) for k, grp in itertools.groupby(data, lambda x:x) if k is None] if none_runs and max(none_runs) >= self.contiguous: ## Fail because there is a contiguous region larger than allowed fwd = "Fail" aft = "Fail" no_fire_area = "Fail" else: trav_cen = get_translation(self.weapon.trans_trav) angles = np.array([x[0] for x in data if x[1] is not None]) muz_pts = np.array([x[1][0] for x in data if x[1] is not None]) gnd_pts = np.array([x[1][1] for x in data if x[1] is not None]) ## Distance along the ground gnd_dist = np.sqrt((trav_cen[0] - gnd_pts[:, 0]) ** 2 + (trav_cen[2] - gnd_pts[:, 2]) ** 2) ## Calculate the minimum shoot distance over contiguous regions mod_dist = grey_erosion(gnd_dist, self.contiguous, mode="wrap") ## Calculate an area metric (A = 0.5 * a.b.sin(c_ang)) num_tris = len(angles) no_fire_area = 0.0 for i in xrange(num_tris): next_ang = (angles[0] + 2 * pi) if i + 1 >= num_tris else angles[i+1] c_ang = next_ang - angles[i] a = gnd_dist[i] b = gnd_dist[(i + 1) % num_tris] no_fire_area += 0.5 * a * b * sin(c_ang) ## Calculate min forward and aft shoot distance. fwd = np.max(mod_dist[np.where((angles >= 3 * pi / 2) | (angles <= pi / 2))]) aft = np.max(mod_dist[np.where((angles <= 3 * pi / 2) & (angles >= pi / 2))]) self.post_process_2d(angles, gnd_dist, mod_dist, no_fire_area, fwd, aft) if self.show_3d: self.post_process_3d(muz_pts, gnd_pts) ## Write results to json file and echo to log. out = { "min_fire_dist_fore_180" : fwd, "min_fire_dist_aft_180" : aft, "no_fire_area" : no_fire_area } tba.write_results(out)
def extract_watermark(audio_file, interactive=False): """ Extracts the watermark from the spectrogram of the given audio file :param audio_file: path to wav file :param interactive: activates plotting :return: watermark as text, or None if the watermark could not be extracted """ # Convert audio file to wav if necessary wavFile = convert_to_wav(audio_file) fs, data = wavfile.read(wavFile) data = data.astype(np.float) / np.max(np.abs(data)) window_length = 1024 nfft = window_length h = window_length // 4 spectrogram, f, t = stft(data, window_length, h, nfft, fs) if interactive: plot_spectrogram(spectrogram) # Convert to PIL image in order to use optical character recognition # Flip upside down due to the usual way in which we view a spectrogram ocr_image = np.flipud(np.abs(spectrogram)) # Do some image enhancement ocr_image[ocr_image < 0.2] = 0 ocr_image = grey_closing(ocr_image, (5, 2)) ocr_image = grey_erosion(ocr_image, (3, 5)) # Convert to 8 bit image ocr_image = np.uint8(ocr_image / np.max(ocr_image) * 255 * 10)[20:120, :] ocr_image[ocr_image > 5] = 255 # Enlarge image by interpolation # ocr_image = imresize(ocr_image, (ocr_image.shape[0] * 8, ocr_image.shape[1] * 8), interp="bilinear") if interactive: # Show for debugging purposes plt.imshow(ocr_image) plt.show() ocr_image = Image.fromarray(ocr_image) ocr_image_filename = "test.png" ocr_image.save(ocr_image_filename, format="png") # watermark = ocr.tesseract(ocr_image) watermark = ocr_space(ocr_image_filename) # ocr_image.save("test.png", format="png") return watermark
def dist_to_dep(dist_maps, cam_Ks, **kwargs): ''' transform distance maps to depth maps. :param dist_maps: distance value maps from camera to poins :param cam_Ks: camera intrinsics :return: depth maps: z values from camera to points. ''' depth_maps = np.ones_like(dist_maps) * np.inf view_id = 0 for dist_map, cam_K in zip(dist_maps, cam_Ks): u, v = np.meshgrid(range(dist_map.shape[1]), range(dist_map.shape[0])) u = u.reshape([1, -1])[0] v = v.reshape([1, -1])[0] dist_cam = dist_map[v, u] non_inf_indices = np.argwhere(dist_cam<np.inf).T[0] dist_cam = dist_cam[non_inf_indices] u = u[non_inf_indices] v = v[non_inf_indices] # calculate coordinates x_temp = (u - cam_K[0][2]) / cam_K[0][0] y_temp = (v - cam_K[1][2]) / cam_K[1][1] z_temp = 1 z = dist_cam / np.sqrt(x_temp**2 + y_temp**2 + z_temp**2) depth_maps[view_id, v, u] = z if 'erosion_size' in kwargs: # This is mainly result of experimenting. # The core idea is that the volume of the object is enlarged slightly # (by subtracting a constant from the depth map). # Dilation additionally enlarges thin structures (e.g. for chairs). (refers to Occupancy Network) depth_maps[view_id] = grey_erosion(depth_maps[view_id], size=(kwargs['erosion_size'], kwargs['erosion_size'])) view_id += 1 return depth_maps
def get_correct_flat_and_mask(self, flat_filename, ass_filename, pos_filename): """usage: get_correct_flat_and_mask(flat_filename, ass_filename, pos_filename) """ calib_dir = self.main_dir + '/calib/' # Reading flat, assign, and position files image_flat = MioDataRedProc(calib_dir + flat_filename) print('Reading ' + flat_filename + ' completed!') assf = mf.ReadASSFile(calib_dir + ass_filename) self.arm_assign_list = assf.get_armcoor() posf = mf.ReadPOSFile(calib_dir + pos_filename) self.fibre_pos_list = posf.get_position() # creating two new dictionary with the same keys of "arm_assign_list" self.active_fibre = self.arm_assign_list.keys() fibre_mask = self.arm_assign_list.fromkeys(self.active_fibre) # fibre mask correct_flat = self.arm_assign_list.fromkeys(self.active_fibre) # calling Bias Rigions 6 images. Just need to call once from the same flat or data images. print('and obtaining flat bias corners') br6_flat = image_flat.BR6Image() # correcting flat images and create an averaged flat template for fibre_number in self.active_fibre: x, y = self.fibre_pos_list[fibre_number] # x, y positions of fibre image # subtracting instead bias level to obtain a corrected flat iamge correct_flatimage = image_flat.FibreImage(x,y) - br6_flat average_flat = np.mean(correct_flatimage, axis=0) # creating an averaged flat # normalising flat image correct_flat[fibre_number] = average_flat / np.amax(average_flat) # creating a fibre mask to remove the non_data edge pixel mask_erosion = morphology.grey_erosion(average_flat, size=(1,1)) fibre_mask[fibre_number] = mask_erosion > np.median(mask_erosion) - 1.5*np.std(mask_erosion) return correct_flat, fibre_mask
def build_iscat_training(bf_filepaths, iscat_filepaths, sampling=4): """Creates iscat training data and target in data/iscat_seg/[REF_FRAMES / MASKS] for the iSCAT cell segmentation task ARGS: bf_filepaths (list(str)): filepaths of all the bright field images to input as returned by utilitiues.load_data_paths() iscat_filepaths (list(str)): filepaths of all the iscat images to input as returned by utilitiues.load_data_paths() sampling (int): sampling interval of the saved images (lower storage footprint) """ OUT_PATH = DATA_PATH + 'iscat_seg/' os.makedirs(os.path.join(OUT_PATH, 'REF_FRAMES/'), exist_ok=True) os.makedirs(os.path.join(OUT_PATH, 'MASKS/'), exist_ok=True) # Range of non filtered elements [px] min_size, max_size = 1, 13 iscat_stacks = (utilities.load_imgs(path) for path in iscat_filepaths) bf_stacks = (utilities.load_imgs(path) for path in bf_filepaths) # Returns the metadata of the exwperiments such as frame rate metadatas = get_experiments_metadata(iscat_filepaths) if torch.cuda.is_available(): device = torch.cuda.current_device() torch.cuda.set_device(device) print("Running on: {:s}".format(torch.cuda.get_device_name(device))) cuda = torch.device('cuda') else: # Doesn't run on CPU only machines comment if no GPU print("No CUDA device found") sys.exit(1) unet = UNetCell(1, 1, device=cuda, bilinear_upsampling=False) unet.load_state_dict(torch.load('outputs/saved_models/bf_unet.pth')) for i, (bf_stack, iscat_stack, metadata) in enumerate(zip(bf_stacks, iscat_stacks, metadatas)): if i < 45: continue bf_stack = bf_stack.astype('float32') print(bf_stack.shape) if bf_stack.shape[1:] != iscat_stack.shape[1:]: bf_stack = processing.coregister(bf_stack, 1.38) print(bf_stack.shape) normalize(bf_stack) # Samples iscat image to correct for the difference in framefate iscat_stack = iscat_stack[::sampling * int(metadata['iscat_fps'] / metadata['tirf_fps'])] torch_stack = torch.from_numpy(bf_stack).unsqueeze(1).cuda() mask = unet.predict_stack( torch_stack).detach().squeeze().cpu().numpy() > 0.05 mask = morphology.grey_erosion(mask * 255, structure=processing.structural_element( 'circle', (3, 5, 5))) mask = morphology.grey_closing(mask, structure=processing.structural_element( 'circle', (3, 7, 7))) mask = (mask > 50).astype('uint8') # Median filtering and normalization iscat_stack = processing.image_correction(iscat_stack) # Contrast enhancement iscat_stack = processing.enhance_contrast(iscat_stack, 'stretching', percentile=(1, 99)) # Fourier filtering of image iscat_stack = processing.fft_filtering(iscat_stack, min_size, max_size, True) iscat_stack = processing.enhance_contrast(iscat_stack, 'stretching', percentile=(3, 97)) for j in range(0, min(iscat_stack.shape[0], mask.shape[0]), sampling): if iscat_stack[j].shape == mask[j].shape: # Doesn't save images without detected cells if mask[j].max() == 0: continue print("\rSaving to stack_{}_{}.png".format(i + 1, j + 1), end=' ' * 5) tifffile.imsave( os.path.join(OUT_PATH, 'REF_FRAMES/', "stack_{}_{}.png".format(i + 1, j + 1)), rescale(iscat_stack[j])) tifffile.imsave( os.path.join(OUT_PATH, 'MASKS/', "mask_{}_{}.png".format(i + 1, j + 1)), mask[j] * 255) else: print("Error, shape: {}, {}".format(iscat_stack[j].shape, mask[j].shape)) break print('')
def _adapt(self, video_idx, frame_idx, last_mask, get_posteriors_fn, kframe_lst): eroded_mask = grey_erosion(last_mask, size=(self.erosion_size, self.erosion_size, 1)) adaptation_target2 = last_mask adaptation_target = np.zeros_like(last_mask) adaptation_target[:] = VOID_LABEL current_posteriors = get_posteriors_fn() positives = current_posteriors[:, :, 1] > self.posterior_positive_threshold if self.use_positives: adaptation_target[positives] = 1 dt = distance_transform_edt(np.logical_not(eroded_mask)) threshold = self.distance_negative_threshold negatives = dt > threshold if self.use_negatives: adaptation_target[negatives] = 0 do_adaptation = eroded_mask.sum() > 0 print('frame_idx', frame_idx) #if self.debug: # adaptation_target_visualization = adaptation_target.copy() # adaptation_target_visualization[adaptation_target == 1] = 128 # if not do_adaptation: # adaptation_target_visualization[:] = VOID_LABEL # from scipy.misc import imsave # folder = self.val_data.video_tag().replace("__", "/") # imsave("forwarded/" + self.model + "/valid/" + folder + "/adaptation_%05d.png" % frame_idx, # np.squeeze(adaptation_target_visualization)) self.train_data.set_video_idx(video_idx) threshold_ = 0.05 #for idx in xrange(self.n_adaptation_steps): # for idx in xrange(frame_idx): do_step = True #print(kframe_lst) if idx % len(kframe_lst) == 0: #adaptation_interval if do_adaptation: #print("NewIter") #print("idx % self.adaptation_interval == 0",idx % self.adaptation_interval == 0) feed_dict = self.train_data.feed_dict_for_video_frame( frame_idx, with_annotations=True) feed_dict[self.train_data.get_label_placeholder( )] = adaptation_target # loss_scale = self.adaptation_loss_scale * 5 adaption_frame_idx = frame_idx else: print >> log.v4, "skipping current frame adaptation, since the target seems to be lost" do_step = False elif idx % len(kframe_lst) == 1: if len(kframe_lst) == 2: feed_dict = self.train_data.feed_dict_for_video_frame( kframe_lst[1], with_annotations=True) loss_scale = self.adaptation_loss_scale * 10 adaption_frame_idx = kframe_lst[1] else: feed_dict = self.train_data.feed_dict_for_video_frame( kframe_lst[-1], with_annotations=True) loss_scale = self.adaptation_loss_scale * 10 adaption_frame_idx = kframe_lst[-1] elif idx % len(kframe_lst) == 2: #print "----------------2------------" #print "len kframe",len(kframe_lst) if len(kframe_lst) > 2: feed_dict = self.train_data.feed_dict_for_video_frame( kframe_lst[-2], with_annotations=True) loss_scale = self.adaptation_loss_scale * 10 adaption_frame_idx = kframe_lst[-2] else: # mix in first frame to avoid drift # (do this even if we think the target is lost, since then this can help to find back the target) feed_dict = self.train_data.feed_dict_for_video_frame( 0, with_annotations=True) loss_scale = 1.0 adaption_frame_idx = 0 if do_step: #self._finetune(video_idx, n_finetune_steps=5) loss, measures, n_imgs = self.trainer.train_step( epoch=idx, feed_dict=feed_dict, loss_scale=loss_scale, learning_rate=self.adaptation_learning_rate) #iou=Measures.calc_iou(measures,n_imgs,[0]) assert n_imgs == 1 #print >> log.v4, "adapting on frame", adaption_frame_idx, "of sequence", video_idx + 1, \ # self.train_data.video_tag(video_idx), "loss:", loss,"iou:",iou if do_adaptation: return negatives else: return None
def shadows(horz_data, slp, asp, sza, eff_sza, saa): """Calculate self, cast and total shadows from a DEM. Args: horz_data (ndarray): horizon elevation data computed from the DEM slp (ndarray): slope calculated from the DEM asp (ndarray): aspect calculated from the DEM sza (ndarray): solar zenith angles gridded as horz_data eff_sza (ndarray): effective solar zenith angles gridded as horz_data saa (ndarray): solar azimuth angles gridded as horz_data Returns: b (ndarray): combined cast and self shadow map. binary product. (1 = shadow, 0 = no shadow). bs (ndarray): self shadows. Binary product. bc (ndarray): cast shadows. binary product.""" # get number of horizon directions from the horizon file N = horz_data.shape[0] # Switch horizon data elevation to angle horz_data = 90 - horz_data # Calculate self shadows (cos gamma is negative) bs = np.ones(shape=np.shape(eff_sza)) # get around error due to Nans eff_sza_nonan = np.copy(eff_sza) eff_sza_nonan[np.isnan(eff_sza_nonan)] = 1 # Self-shadows with relaxed (slightly positive angle) value bs[np.cos(eff_sza_nonan) < 0.035] = 0 # Elementary angle between horizon lines dphi = 2 * np.pi / N # Find the horizon line surrounding a given azimuth nn1 = np.int8(np.floor(saa / dphi)) nn2 = np.int8(np.ceil(saa / dphi)) m1 = np.uint32(np.mod(N / 2 - nn1, N) + 1) m2 = np.uint32(np.mod(N / 2 - nn2, N) + 1) m1prodshape = (np.shape(m1)[0] * np.shape(m1)[1]) m1L = m1prodshape * (m1.flatten() - 1) + np.uint32( np.arange(1, m1prodshape + 1, 1)) m2prodshape = (np.shape(m2)[0] * np.shape(m2)[1]) m2L = m2prodshape * (m2.flatten() - 1) + np.uint32( np.arange(1, m2prodshape + 1, 1)) # Calculation broken up for clarity H1 = np.reshape(horz_data.flatten()[m1L - 1], np.shape(m1)) H2 = np.reshape(horz_data.flatten()[m2L - 1], np.shape(m2)) H = np.minimum(H1, H2) # Calculate cast shadows # In ModImLam the original strict formulatuion: # "bc[H < solar_zen] = 1" # was relaxed to compute a bit larger, following Sirguey et al. 2009 # but it overestimates the cast shadows for the Alps bc = np.ones(shape=np.shape(H)) # Initialise sza_deg = np.rad2deg(sza) bc[H < sza_deg] = 0 # bc[H < sza_deg + (-0.406 * sza_deg)] = 1 # Use a morphological operation (erode) to clean shadow mask by removing # scattered pixels bc_fat = morphology.grey_dilation(bc, size=(3, 3)) bc = morphology.grey_erosion(bc_fat, size=(3, 3)) # Calculate the combination of cast and self as binary product b = np.logical_and(bs, bc).astype(int) return (b, bs, bc)
def contourcuts(image,maxdist=15,minrange=10,mincdist=20,sigma=1.0,debug=0,r=8,s=0.5): if debug: figure(1); clf(); imshow(image) # start by computing the contours contours = image2contours(image!=0) # generate a mask for grayscale morphology mask = s*ones((r,r)) mask[2:-2,2:-2] = 0 cuts = [] # now handle each (external) contour individually for k,cs in enumerate(contours): # compute a matrix of all the pairwise distances of pixels # around the contour, then smooth it a little ds = distance.cdist(cs,cs) ds = filters.gaussian_filter(ds,(sigma,sigma),mode='wrap') # compute a circulant matrix telling us the pathlength # between any two pixels on the contour n = len(cs) l = abs(arange(n)-n/2.0) l = l[0]-l cds = linalg.circulant(l) # find true local minima (exclude ridges) by using the # structuring element above ge = morphology.grey_erosion(ds,structure=mask,mode='wrap') locs = (ds<=ge) # restrict it to pairs of points that are closer than maxdist locs *= (ds<maxdist) # restrict it to paris of points that are separated by # at least mincdist on the contour locs *= (cds>=mincdist) # label the remaining minima and locate them locs,n = measurements.label(locs) cms = measurements.center_of_mass(locs,locs,range(1,n+1)) # keep only on of each pair (in canonical ordering) cms = [(int(i+0.5),int(j+0.5)) for i,j in cms if i<j] for i,j in cms: x0,y0 = cs[i] x1,y1 = cs[j] # keep only the near vertical ones if abs(y1-y0)>abs(x1-x0): color = 'r' cuts.append((cs[i],cs[j])) else: color = 'b' if debug: print (x0,y0),(x1,y1) figure(1); plot([x0,x1],[y0,y1],color) if debug: figure(2); clf(); ion(); imshow(locs!=0) figure(3); clf(); imshow(minimum(ds,maxdist*1.5),interpolation='nearest') ginput(1,0.1) print "hit ENTER"; raw_input() # now construct a cut image cutimage = zeros(image.shape) for ((x0,y0),(x1,y1)) in cuts: image_draw_line(cutimage,y0,x0,y1,x1) cutimage = filters.maximum_filter(cutimage,(3,3)) if debug: figure(4); clf(); imshow(maximum(0,image-0.5*cutimage)) return cutimage
def gray_erosion(self, *args, **kw): '''see scipy.ndimage.morphology.grey_erosion''' return Image(_morphology.grey_erosion(self, *args, **kw)).convert_type(self.dtype)
def calculate_metrics(self): assert self.contains_mahalanobis_distances, "Can't calculate ROC without mahalanobis distances calculated" # (Name, ROC_AUC, AUC_PR, f1) results = list() extractor = os.path.basename(self.filename).replace(".h5", "") gauss_filters = [ None, (0, 1, 1), (0, 2, 2), (1, 0, 0), (1, 1, 1), (1, 2, 2) ] # gauss_filters = [None, (0,1,1), (0,2,2), (0,3,3), # (1,0,0), (1,1,1), (1,2,2), (1,3,3), # (2,0,0), (2,1,1), (2,2,2), (2,3,3)] other_filters = [None, "erosion", "dilation"] for metric_name, metric in self.METRICS.items(): relevant = metric.get_relevant(self) labels = metric.get_labels(relevant) for other_filter in other_filters: for gauss_filter in gauss_filters: # Don't compute gauss filters (in image space) for per sum metrics (they take the average anyways) if metric_name.endswith( "(sum)" ) and gauss_filter != None and gauss_filter[1] > 0: continue title = "Metrics for %s (%s, filter:%s + %s)" % ( extractor, metric_name, gauss_filter, other_filter) logger.info("Calculating %s" % title) scores = dict() for n in sorted(self.mahalanobis_distances.dtype.names): name = n.replace("fake", "simple") maha = relevant.mahalanobis_distances[n] if gauss_filter is not None: maha = utils.gaussian_filter(maha, sigma=gauss_filter) if other_filter is not None: struct = generate_binary_structure(2, 1) if struct.ndim == 2: z = np.zeros_like(struct, dtype=np.bool) struct = np.stack((z, struct, z)) if other_filter == "erosion": maha = grey_erosion(maha, structure=struct) elif other_filter == "dilation": maha = grey_dilation(maha, structure=struct) scores[name] = metric.get_values(maha) filename = os.path.join( consts.METRICS_PATH, "%s_%s_%s_%s.jpg" % (extractor, metric_name, gauss_filter, other_filter)) result = self.calculate_roc(title, labels, scores, filename) for model, roc_auc, auc_pr, max_f1, fpr0, fpr1, fpr2, fpr3, fpr4, fpr5 in result: results.append( (extractor, metric_name, model, gauss_filter, other_filter, roc_auc, auc_pr, max_f1, fpr0, fpr1, fpr2, fpr3, fpr4, fpr5)) return results
if (exponent!= 1): print "- Nonlinear Stretching..." # Data=((Data/float(Data.max()/theta) )**exponent*255/phi).astype(int) Data=((Data/float(Data.max()/theta) )**exponent*255/phi).astype(int) if(save): pl.save("Stretching", Data) if(opening): print "- Morphological Opening..." Data=grey_opening(Data, structure=Cross) if(save): pl.save("Opening", Data) if(erosion): print "- Morphological Erosion..." Data=grey_erosion(Data, structure=Cross) if(save): pl.save("Erosion", Data) if(closing): print "- Morphological Closing..." Data=grey_closing(Data, structure=Cross) if(save): pl.save("Closing", Data) # Remark: one could keep on with other transformations, other kernels and so on # To do so, I would reccomend to use ipython, and eventually load the partial results FinalStep=Data if(view): view_slice(FinalStep,SizeX/2)
def _adapt(self, video_idx, frame_idx, last_mask, get_posteriors_fn, adapt_flag=0): """ adapt_flag (int): 0:do not adapt, 1:adapt with hard labels based on teacher, 2:adapt on hard labels from last mask """ # Perform Mask erosion to reduce effect of false positive eroded_mask = grey_erosion(last_mask, size=(self.erosion_size, self.erosion_size, 1)) # Compute distance transform dt = distance_transform_edt(numpy.logical_not(eroded_mask)) # Adaptation target initialize adaptation_target = numpy.zeros_like(last_mask) adaptation_target[:] = VOID_LABEL # Retrieve current probability map to adapt with current_posteriors = get_posteriors_fn() if adapt_flag == 2: positives = current_posteriors[:, :, 1] > self.posterior_positive_threshold elif adapt_flag == 1: positives = last_mask == 1 if self.use_positives: adaptation_target[positives] = 1 # Threshold based on distance transform threshold = self.distance_negative_threshold negatives = dt > threshold if self.use_negatives: adaptation_target[negatives] = 0 do_adaptation = eroded_mask.sum() > 0 # Save adaptation targets for debugging if self.debug: adaptation_target_visualization = adaptation_target.copy() adaptation_target_visualization[adaptation_target == 1] = 128 if not do_adaptation: adaptation_target_visualization[:] = VOID_LABEL from scipy.misc import imsave folder = self.val_data.video_tag().replace("__", "/") imsave( "forwarded/" + self.model + "/valid/" + folder + "/adaptation_%05d.png" % frame_idx, numpy.squeeze(adaptation_target_visualization)) self.train_data.set_video_idx(video_idx) # Start Adapting based on number of adaptation_steps for idx in xrange(self.n_adaptation_steps): do_step = True #if idx % self.adaptation_interval == 0: if do_adaptation: feed_dict = self.train_data.feed_dict_for_video_frame( frame_idx, with_annotations=True) feed_dict[self.train_data.get_label_placeholder( )] = adaptation_target loss_scale = self.adaptation_loss_scale adaption_frame_idx = frame_idx else: do_step = False if do_step: loss, _, n_imgs = self.trainer.train_step( epoch=idx, feed_dict=feed_dict, loss_scale=loss_scale, learning_rate=self.adaptation_learning_rate) assert n_imgs == 1 print >> log.v4, "adapting on frame", adaption_frame_idx, "of sequence", video_idx + 1, \ self.train_data.video_tag(video_idx), "loss:", loss if do_adaptation: return negatives else: return None
cv2.imwrite(cur_save_root + '00000.png', output) for j in range(1, cur_seq_range): img_file = osp.join(cur_seq_image_path, '%05d' % j + '.jpg') cur_img, ori_img_size = load_image_label_davis17( img_file, cfg['crop_size']) cur_img = Variable(cur_img).cuda() output = model.forward(cur_img, model_weights, train_mode, bn_params) output = sigmoid(interp(output)).cpu().data[0, 0].numpy() # use current frame to generate label for next frame gen_label = np.zeros_like(output) gen_label[:] = adapt_ignore_label eroded_mask = grey_erosion(last_mask, size=(adapt_erosion_size, adapt_erosion_size)) eroded_mask[eroded_mask < 0.1] = 0 dt = distance_transform_edt(np.logical_not(eroded_mask)) gen_label[output > pos_pred_thres] = 1 gen_label[dt > neg_dist_thres] = 0 do_adapt = eroded_mask.sum() > 0 if adapt_debug: if do_adapt: gen_label_vis = gen_label.copy() gen_label_vis[gen_label == 1] = 128 else: gen_label_vis[:] = adapt_ignore_label
return keypoints, np.array(descrs) img1 = cv2.imread("masked_points.jpg", 0) img2 = cv2.imread("masked_disks.jpg", 0) # pixel-wise mean taken over an image ensemble mean1 = cv2.imread("masked_mean1.jpg", 0) mean2 = cv2.imread("masked_mean2.jpg", 0) # turn the points into disks img1 -= mean1 img2 -= mean2 img1 = grey_erosion(img1, size=(3, 3)) # flip and scale the image img1 = np.hflip(img1) img1 = imresize(0.5) # locate intensity peaks. wherever they are, black them out and replace them with a large circle in a blank image virtualdisks = np.zeros(img2.shape) max_threshold = 200 virtualradius = 60 eraserradius = 13 while np.max(img1) > 200: cy, cx = np.argmax(img1, axis=0), np.argmax(img1, axis=1) rr, cc = circle(cy, cx, virtualradius)
def get_data_from_image_id( image_id, coco_obj, img_size, base_path, grayscale=False, generate_negative_points=True, aug_pipeline=None, toggle_bin_mask=True, toggle_joints=True, toggle_dp_seg=True, toggle_dp_points=True, toggle_instance_offsets=True ): all_results = {} res = get_image_data( coco_obj, image_id, base_path, toggle_image=True, toggle_bin_mask=toggle_bin_mask, toggle_joints=toggle_joints, toggle_dp_seg=toggle_dp_seg, toggle_dp_points=toggle_dp_points, toggle_instance_offsets=toggle_instance_offsets ) # image image = res['image'] xyhw_box = (0, 0, image.shape[0], image.shape[1]) # y, x, h, w xyhw_box = bbu.extend_xyhw_to_ratio(xyhw_box, img_size[1]/img_size[0]) xyhw_box = bbu.round_bbox_params(xyhw_box) padded_img = imu.pad_to_bbox(image, xyhw_box, mode='mean') new_bbox = (max(0, xyhw_box[0]), max(0, xyhw_box[1]), xyhw_box[2], xyhw_box[3]) offset_x, offset_y = xyhw_box[0], xyhw_box[1] padded_crop = padded_img[new_bbox[0]:new_bbox[0]+new_bbox[2], new_bbox[1]:new_bbox[1]+new_bbox[3]] old_shape = padded_crop.shape[:] resized_crop = cv2.resize(padded_crop, img_size[::-1], interpolation=cv2.INTER_AREA) all_results['image'] = resized_crop # keypoints if toggle_joints: all_keypoints = res['joints'].copy() all_shifted_pts = all_keypoints - [offset_x, offset_y, 0, 0] # coordinates in the crops frame all_shifted_pts = all_shifted_pts[ # remove ousiders (0 <= all_shifted_pts[:, 0]) & (all_shifted_pts[:, 0] < padded_crop.shape[0]) & (0 <= all_shifted_pts[:, 1]) & (all_shifted_pts[:, 1] < padded_crop.shape[1]) ] # rescale keypoints all_rescaled_pts = all_shifted_pts * [img_size[0]/old_shape[0], img_size[1]/old_shape[1], 1., 1.] all_results['joints'] = all_rescaled_pts # fix joints loss region joints_loss_region = res['joints_loss_region'] joints_loss_region = imu.pad_to_bbox(joints_loss_region, xyhw_box, mode='constant', cval=1) # pad with 1. to collect loss from no mask outside the image joints_loss_region = joints_loss_region[new_bbox[0]:new_bbox[0]+new_bbox[2], new_bbox[1]:new_bbox[1]+new_bbox[3]] joints_loss_region = cv2.resize(joints_loss_region.astype(np.uint8), img_size[::-1], interpolation=cv2.INTER_NEAREST).astype(np.float32) all_results['joints_loss_region'] = grey_erosion(joints_loss_region, 5) # pad, crop and resize bin_mask if toggle_bin_mask: _bin_mask = res['bin_mask'] _bin_mask = imu.pad_to_bbox(_bin_mask, xyhw_box, mode='constant') _bin_mask = _bin_mask[new_bbox[0]:new_bbox[0]+new_bbox[2], new_bbox[1]:new_bbox[1]+new_bbox[3]] bin_mask = cv2.resize(_bin_mask.astype(np.uint8), img_size[::-1], interpolation=cv2.INTER_NEAREST).astype(np.float32) all_results['bin_mask'] = bin_mask # pad, crop and resize dp_mask if toggle_dp_seg: _dp_mask = res['dp_mask'] _dp_mask = imu.pad_to_bbox(_dp_mask, xyhw_box, mode='constant') _dp_mask[:, :, 0] = ~np.logical_or.reduce(_dp_mask[:, :, 1:], axis=2) _dp_mask = _dp_mask[new_bbox[0]:new_bbox[0]+new_bbox[2], new_bbox[1]:new_bbox[1]+new_bbox[3]] dp_mask = resize(_dp_mask.astype(np.float32), img_size, order=0, mode='edge', anti_aliasing=False).astype(np.float32) # dp_mask = softmax(dp_mask, axis=2) dp_mask /= dp_mask.sum(axis=2)[:, :, None] all_results['dp_mask'] = dp_mask # rescale densepose points if toggle_dp_points: _dp_points = res['dp_points'].copy() _dp_points = _dp_points - [offset_x, offset_y, 0., 0., 0.] # coordinates in the crops frame _dp_points = _dp_points[ # remove ousiders (0 <= _dp_points[:, 0]) & (_dp_points[:, 0] < padded_crop.shape[0]) & (0 <= _dp_points[:, 1]) & (_dp_points[:, 1] < padded_crop.shape[1]) ] # rescale keypoints dp_points = _dp_points * [img_size[0]/old_shape[0], img_size[1]/old_shape[1], 1., 1., 1.] if generate_negative_points: dp_points = np.concatenate([dp_points, generate_neg_coords(bin_mask.astype(np.bool) | (~dp_mask[:, :, 0].astype(np.bool)))], axis=0) all_results['dp_points'] = dp_points # instance offsets if toggle_instance_offsets: inst_offsets, head_points = res['instance_offsets'], res['head_points'] inst_offsets = imu.pad_to_bbox(inst_offsets, xyhw_box, mode='constant') inst_offsets = inst_offsets[new_bbox[0]:new_bbox[0]+new_bbox[2], new_bbox[1]:new_bbox[1]+new_bbox[3]] # inst_offsets /= list(inst_offsets.shape[:2]) # normalize offsets inst_offsets = cv2.resize(inst_offsets, img_size[::-1], interpolation=cv2.INTER_NEAREST) all_results['instance_offsets'] = inst_offsets if head_points != []: head_points = head_points - [offset_x, offset_y] head_points = head_points * [img_size[0]/old_shape[0], img_size[1]/old_shape[1]] all_results['head_points'] = head_points ################## AUGMENTS HERE ################### if aug_pipeline is not None: all_results = aug_pipeline(all_results) if toggle_instance_offsets: pts = all_results['head_points'] all_results['instance_offsets_loss_region'] = (all_results['instance_offsets'] > 0).astype(np.float32) offset_mask = np.zeros((img_size[0], img_size[1], 2), dtype=np.float32) if pts != []: for i in range(pts.shape[0]): head_point = pts[i] # convert instance maps into offset maps and loss region xx, yy = np.where(all_results['instance_offsets'] == i + 1) offset_mask[xx, yy, 0] = head_point[0] - xx offset_mask[xx, yy, 1] = head_point[1] - yy all_results['instance_offsets'] = offset_mask / [img_size[0], img_size[1]] del all_results['head_points'] return all_results
def _adapt(self, video_idx, frame_idx, last_mask, get_posteriors_fn, kframe_lst): eroded_mask = grey_erosion(last_mask, size=(self.erosion_size, self.erosion_size, 1)) dt = distance_transform_edt(numpy.logical_not(eroded_mask)) adaptation_target2 = last_mask adaptation_target = numpy.zeros_like(last_mask) adaptation_target[:] = VOID_LABEL current_posteriors = get_posteriors_fn() positives = current_posteriors[:, :, 1] > self.posterior_positive_threshold if self.use_positives: adaptation_target[positives] = 1 threshold = self.distance_negative_threshold negatives = dt > threshold if self.use_negatives: adaptation_target[negatives] = 0 do_adaptation = eroded_mask.sum() > 0 if self.debug: adaptation_target_visualization = adaptation_target.copy() adaptation_target_visualization[adaptation_target == 1] = 128 if not do_adaptation: adaptation_target_visualization[:] = VOID_LABEL from scipy.misc import imsave folder = self.val_data.video_tag().replace("__", "/") imsave( "forwarded/" + self.model + "/valid/" + folder + "/adaptation_%05d.png" % frame_idx, numpy.squeeze(adaptation_target_visualization)) self.train_data.set_video_idx(video_idx) threshold_ = 0.020 #print "self.n_adaptation_steps",n_adaptation_steps for idx in xrange(self.n_adaptation_steps): # print idx do_step = True if idx % self.adaptation_interval == 0: #adaptation_interval if do_adaptation: print("NewIter") #--------------------pre-frame-result----------------- feed_dict = self.train_data.feed_dict_for_video_frame( frame_idx, with_annotations=True) feed_dict[self.train_data.get_label_placeholder( )] = adaptation_target # loss_scale = self.adaptation_loss_scale * 5 adaption_frame_idx = frame_idx else: print >> log.v4, "skipping current frame adaptation, since the target seems to be lost" do_step = False elif idx % self.adaptation_interval == 1: print "idx % self.adaptation_interval =1" if frame_idx >= 1: print "-----------" key_list = self.extractBorderFrame(video_idx, kframe_lst[-1], frame_idx - 1, threshold_) key = key_list[-1] if key not in kframe_lst: kframe_lst.append(key) feed_dict = self.train_data.feed_dict_for_video_frame( kframe_lst[-1], with_annotations=True) loss_scale = self.adaptation_loss_scale adaption_frame_idx = kframe_lst[-1] elif idx % self.adaptation_interval == 2: print "idx % self.adaptation_interval=2" if len(kframe_lst) > 2: feed_dict = self.train_data.feed_dict_for_video_frame( kframe_lst[-2], with_annotations=True) adaption_frame_idx = kframe_lst[-2] if kframe_lst[-2] - kframe_lst[-1] > 5: loss_scale = self.adaptation_loss_scale * 100 else: loss_scale = self.adaptation_loss_scale elif idx % self.adaptation_interval == 3: print "idx % self.adaptation_interval=3" if len(kframe_lst) > 3: feed_dict = self.train_data.feed_dict_for_video_frame( kframe_lst[-3], with_annotations=True) adaption_frame_idx = kframe_lst[-3] if kframe_lst[-3] - kframe_lst[-1] > 10: loss_scale = self.adaptation_loss_scale * 100 else: loss_scale = self.adaptation_loss_scale else: print "else" # mix in first frame to avoid drift # (do this even if we think the target is lost, since then this can help to find back the target) feed_dict = self.train_data.feed_dict_for_video_frame( 0, with_annotations=True) loss_scale = 1.0 adaption_frame_idx = 0 if do_step: loss, measures, n_imgs = self.trainer.train_step( epoch=idx, feed_dict=feed_dict, loss_scale=loss_scale, learning_rate=self.adaptation_learning_rate) iou = Measures.calc_iou(measures, n_imgs, [0]) assert n_imgs == 1 print >> log.v4, "adapting on frame", adaption_frame_idx, "of sequence", video_idx + 1, \ self.train_data.video_tag(video_idx), "loss:", loss,"iou:",iou if do_adaptation: return negatives, kframe_lst else: return None, kframe_lst
def __maha__(self, x=None, only_refresh_image=False): image = np.zeros((350, 480, 3), dtype=np.uint8) if self.model_index > 0 and self.patches.contains_mahalanobis_distances: font = cv2.FONT_HERSHEY_SIMPLEX fontScale = 0.5 thickness = 1 model = sorted(self.patches.mahalanobis_distances.dtype.names)[self.model_index - 1] cv2.putText(image,"Model:", (10, 20), font, fontScale, (255,255,255), thickness, lineType=cv2.LINE_AA) cv2.putText(image, model, (65, 20), font, fontScale, (255,255,255), thickness, lineType=cv2.LINE_AA) cv2.putText(image,"Filter:", (10, 50), font, fontScale, (255,255,255), thickness, lineType=cv2.LINE_AA) if not only_refresh_image: self.patches.mahalanobis_distances_filtered[:] = self.patches.mahalanobis_distances[model] sigma_0 = (cv2.getTrackbarPos("0_gaussian_0", self.WINDOWS_MAHA), cv2.getTrackbarPos("0_gaussian_1", self.WINDOWS_MAHA), cv2.getTrackbarPos("0_gaussian_2", self.WINDOWS_MAHA)) if sigma_0 != (0, 0, 0): cv2.putText(image, "gaussian (%i, %i, %i)" % sigma_0, (65, 50), font, fontScale, (255,255,255), thickness, lineType=cv2.LINE_AA) if not only_refresh_image: self.patches.mahalanobis_distances_filtered = utils.gaussian_filter(self.patches.mahalanobis_distances_filtered, sigma=sigma_0) erosion_dilation = cv2.getTrackbarPos("1_erosion_dilation", self.WINDOWS_MAHA) if erosion_dilation > 0: struct = generate_binary_structure(cv2.getTrackbarPos("1_erosion_dilation_structure_rank", self.WINDOWS_MAHA), cv2.getTrackbarPos("1_erosion_dilation_structure_connectivity", self.WINDOWS_MAHA)) if struct.ndim == 2: z = np.zeros_like(struct, dtype=np.bool) struct = np.stack((z, struct, z)) if erosion_dilation == 1: cv2.putText(image, "erosion", (65, 80), font, fontScale, (255,255,255), thickness, lineType=cv2.LINE_AA) if not only_refresh_image: self.patches.mahalanobis_distances_filtered = grey_erosion(self.patches.mahalanobis_distances_filtered, structure=struct) elif erosion_dilation == 2: cv2.putText(image, "dilation", (65, 80), font, fontScale, (255,255,255), thickness, lineType=cv2.LINE_AA) if not only_refresh_image: self.patches.mahalanobis_distances_filtered = grey_dilation(self.patches.mahalanobis_distances_filtered, structure=struct) for (z, x, y) in np.ndindex(struct.shape): cv2.putText(image, str(int(struct[z, x, y])), (150 + y * 15 + z * 60, 80 + x * 15), font, fontScale, (255,255,255), thickness, lineType=cv2.LINE_AA) sigma_2 = (cv2.getTrackbarPos("2_gaussian_0", self.WINDOWS_MAHA), cv2.getTrackbarPos("2_gaussian_1", self.WINDOWS_MAHA), cv2.getTrackbarPos("2_gaussian_2", self.WINDOWS_MAHA)) if sigma_2 != (0, 0, 0): cv2.putText(image, "gaussian (%i, %i, %i)" % sigma_2, (65, 140), font, fontScale, (255,255,255), thickness, lineType=cv2.LINE_AA) if not only_refresh_image: self.patches.mahalanobis_distances_filtered = utils.gaussian_filter(self.patches.mahalanobis_distances_filtered, sigma=sigma_2) # Add some statistics threshold = float(cv2.getTrackbarPos("threshold", self.WINDOWS_MAHA)) / 10000.0 cv2.putText(image, " TPR FPR Threshold", (10, 190), font, fontScale, (255,255,255), thickness, lineType=cv2.LINE_AA) self._metrics_ax1.clear() # self._metrics_ax2.clear() self._metrics_ax1.set_yscale("log") for i, (metric_name, metric) in enumerate(sorted(PatchArray.METRICS.items())): labels = metric.get_labels(self.patches) scores = metric.get_values(self.patches.mahalanobis_distances_filtered) if metric.current_threshold == -1: m = np.max(scores) metric.current_threshold = m else: m = metric.current_threshold thresh = m * threshold negavites = scores[labels == 1] positives = scores[labels == 2] false_negavites = np.count_nonzero(negavites >= thresh) true_positives = np.count_nonzero(positives >= thresh) tpr = true_positives / float(positives.size) * 100.0 if float(positives.size) > 0 else 0 fpr = false_negavites / float(negavites.size) * 100.0 if float(negavites.size) > 0 else 0 if metric_name != "per patch" and i == cv2.getTrackbarPos("metric", self.WINDOWS_MAHA): self._labels = labels self._scores = scores self._thresh = thresh for r in np.reshape(np.diff(np.r_[0, labels == 0, 0]).nonzero()[0], (-1,2)): self._metrics_ax1.axvspan(r[0], r[1], facecolor='black', alpha=0.1) for r in np.reshape(np.diff(np.r_[0, np.logical_and(labels == 2, scores >= thresh), 0]).nonzero()[0], (-1,2)): self._metrics_ax1.axvspan(r[0], r[1], facecolor='g', alpha=0.2) for r in np.reshape(np.diff(np.r_[0, np.logical_and(labels == 1, scores >= thresh), 0]).nonzero()[0], (-1,2)): self._metrics_ax1.axvspan(r[0], r[1], facecolor='r', alpha=0.2) for r in np.reshape(np.diff(np.r_[0, np.logical_and(labels == 2, scores < thresh), 0]).nonzero()[0], (-1,2)): self._metrics_ax1.axvspan(r[0], r[1], facecolor='b', alpha=0.2) self._metrics_ax1.set_ylim(0, np.max(scores)) self._metrics_ax1.plot(scores, lw=1, label=metric_name, color="black") # self._metrics_ax1.axvline(x=self.index, linewidth=0.5, color="black") self._metrics_ax1.axhline(y=thresh, linewidth=0.5, color="black") self._metrics_fig.suptitle(metric_name) if not only_refresh_image: self._histogram_ax1.clear() self._histogram_ax2.clear() # r = (np.nanmin(self.patches.mahalanobis_distances_filtered), np.nanmax(self.patches.mahalanobis_distances_filtered)) self._histogram_ax1.set_title("No anomaly") self._histogram_ax2.set_title("Anomaly") self._histogram_fig.suptitle("Mahalanobis distances") _, bins, _ = self._histogram_ax1.hist(negavites.ravel(), bins=200) self._histogram_ax2.hist(positives.ravel(), bins=bins) self._histogram_fig.canvas.draw() cv2.putText(image, metric_name, (40, 220 + i*30), font, fontScale, (255,255,255), thickness, lineType=cv2.LINE_AA) cv2.putText(image, "%.2f" % tpr, (200, 220 + i*30), font, fontScale, (255,255,255), thickness, lineType=cv2.LINE_AA) cv2.putText(image, "%.2f" % fpr, (300, 220 + i*30), font, fontScale, (255,255,255), thickness, lineType=cv2.LINE_AA) cv2.putText(image, "%.2f" % thresh, (400, 220 + i*30), font, fontScale, (255,255,255), thickness, lineType=cv2.LINE_AA) self._metrics_fig.canvas.draw() self.__draw__() cv2.imshow(self.WINDOWS_MAHA, image)