def mandlebrot_test(fapth): mask = compute_mandelbrot(100, 50., 601, 401).astype(np.uint8) # Initialise the CloudMasker program cloud_masker = CloudMasker(mask) # Make Mandelbrot with gu.progress("", 150) as progress: for pad in range(150): mask_padded = cloud_masker.pad_mask(masked_value=1, padding=pad, merge_masks=False) cloud_masker._mask = mask_padded cloud_masker.output_mask("{fpath}mandlebrot_{pad}.tiff".format( fpath=fapth, pad=str(pad).rjust(3, "0"))) progress.update() # Make video of the mandlebrot ffmpeg_cmd = 'ffmpeg -r 25 -start_number 000 -i mandlebrot_%03d.tiff '\ '-vf "pad=ceil(iw/2)*2:ceil(ih/2)*2" -preset slow ' \ '-profile:v high -level:v 4.0 -pix_fmt yuv420p -crf 22 ' \ '-codec:a aac -b:v 50000k -minrate 50000k -maxrate 50000k ' \ '-an mandlebrot_timelapse.mov' logger.info(f"Creating mandlebrot video using FFMPEG cmd: {ffmpeg_cmd}") run_command(ffmpeg_cmd)
def _get_creation(self, file_list, file_type): """ Finds the creation time for image and video files Parameters ---------- filetypes : str specify the file types in filelist. Either 'images' or 'videos' Reference --------- https://stackoverflow.com/a/23064792 """ if file_type == 'images': creation_date = [] with progress('Photos checked: %s/%s', size=len(file_list)) as prog: for i, file in enumerate(file_list): # Update the progress bar. prog.update(i, len(file_list)) try: creation_date.append( Image.open(file)._getexif()[36867]) except (KeyError, TypeError, AttributeError): # Only works on Windows creation_date.append( datetime.fromtimestamp(os.path.getmtime( file)).strftime('%Y:%m:%d %H:%M:%S')) elif file_type == 'videos': creation_date = [] with progress('Videos checked: %s/%s', size=len(file_list)) as prog: for i, file in enumerate(file_list): # Update the progress bar. prog.update(i, len(file_list)) creation_date.append( datetime.fromtimestamp(os.path.getmtime( file)).strftime('%Y:%m:%d %H:%M:%S')) return creation_date
def _remove_duplicates(self): """ Checks for duplicate files using hashing algorithm. """ # Hash all files total_len = len(self.image_files) + len(self.video_files) iter = 0 hash_files = {'Image': [], 'Video': []} with progress('Files hashed: %s/%s', size=total_len) as prog: for file_type, file_supname in zip( [self.image_files, self.video_files], ['Image', 'Video']): for file in file_type: # Update the progress bar. prog.update(iter, total_len) hash_files[file_supname].append(self._sha256sum(file)) iter += 1 # Convert hash_files into numpy arrays for complex indexing. hash_files[file_supname] = np.array(hash_files[file_supname], dtype=str) print("[INFO] Removing all but one duplicate images...") # Check for duplicates for file_type, file_supname in zip( [self.image_files, self.video_files], ['Image', 'Video']): # Convert hash_files into numpy arrays for complex indexing. file_type = np.array(file_type, dtype=str) # Work out the non_unique hashes unique, counts = np.unique(hash_files[file_supname], return_counts=True) not_unique = unique[counts > 1] print("[INFO] Number of duplicate {type} found are: {num}".format( type=file_supname.lower() + 's', num=not_unique.size)) # Loop over each duplicate hash and find the files that match that # hash value and remove all but one of the files. for hash in not_unique: for file in file_type[hash_files[file_supname] == hash][1:]: print("[INFO] Removing file: {file}".format(file=file)) os.remove(file) # Now need to recheck the files self.image_files, self.video_files = self._findfiles( basedir=self.basedir, images=True, videos=True)
def rename(self, remove_duplicates=False): """ Method used to rename all files within basedir. Follows the filename format, image_<sub_directory>_imagenum.<original_file_format> :param remove_duplicates : bool, optional Remove the duplicate images and videos. This is done by hashing all the images and videos (SHA256) and finding clashes. """ print("[INFO] Renaming your photos and videos.") # Check for duplicate files if requested. if remove_duplicates: print("[INFO] Checking for duplicate image and video files") self._remove_duplicates() print("[INFO] Getting the creation dates of all your photos...") # Get creation dates for all image files image_creation_date = self._get_creation(self.image_files, file_type='images') # Sort image_files by date mask = np.argsort(image_creation_date, kind='mergesort') self.image_files = self.image_files[mask] print("[INFO] Getting the creation dates of all your videos...") # Get creation dates for all video files video_creation_date = self._get_creation(self.video_files, file_type='videos') # Sort image_files by date mask = np.argsort(video_creation_date, kind='mergesort') self.video_files = self.video_files[mask] print("[INFO] Creating tempory names for you photos and videos. " \ "This avoids errors if you run this program multiple times.") # Rename all images/videos (FAKE Version) total_len = len(self.image_files) + len(self.video_files) iter = 0 with progress('Videos checked: %s/%s', size=total_len) as prog: for file_type, file_supname in zip( [self.image_files, self.video_files], ['Image', 'Video']): temp_names = [] for i, file in enumerate(file_type): # Update the progress bar. prog.update(iter, total_len) # Create new filename temp_names.append( file.parents[0] / (self._generate_temp_name() + file.suffix)) # Rename file os.rename(file, temp_names[-1]) iter += 1 if file_supname == 'Image': image_files = temp_names else: video_files = temp_names print("[INFO] Now, lets rename all photos and videos into a nice " \ "organised format...") # Rename all images/videos (REAL Version) total_len = len(image_files) + len(video_files) iter = 0 with progress('Videos checked: %s/%s', size=total_len) as prog: for file_type, file_supname in zip([image_files, video_files], ['Image', 'Video']): for i, file in enumerate(file_type): # Update the progress bar. prog.update(iter, total_len) # Get directory name of file exists within sup_dirname = file.parents[0].name # Create new filename file_new = file.parents[0] / ( file_supname + '_' + sup_dirname.replace(' ', '_') + '_' + str(i).rjust(4, '0') + file.suffix) # Rename file os.rename(file, file_new) iter += 1
def stress_test(num_sizes=None, num_clouds=None, num_iters=None, save_data=None, load_data=False, save_plot=None): if load_data is False: # Lets stress test this sucker np.random.seed(0) num_sizes = 5 if num_sizes is None else num_sizes num_clouds = 5 if num_clouds is None else num_clouds num_iters = 200 if num_iters is None else num_iters total = num_sizes * num_clouds * num_iters box_size = np.geomspace(0.5 * 10**4, 1, num=num_sizes, dtype=int) box_entropy = np.geomspace(10**-5, 100, num=num_clouds, dtype=np.float64) results = np.zeros((num_sizes, num_clouds, num_iters), dtype=np.float64) with gu.progress( "Box Size: %s, Box Entropy: %s," "Iteration: %s. Time: %s", total) as progress: for i, size in enumerate(box_size): for j, mask_num in enumerate(box_entropy): for k in range(num_iters): # Work out the number of possible values you want to # randomise in your grid. The idea here is that the # mask number will dictate the percentage of the grid # that is a cloud i.e. 1 to 100%. entropy = int( max(1, size**2 - (size**2 * (mask_num / 100)))) # Create the grid with a "percentage" being cloud" mask = np.random.randint( entropy, size=(int(size), int(size))).astype(np.uint8) # Initialise the cloud masker class cloud_masker = CloudMasker(mask) # Pass the mask by 1 value. t1 = time.time() mask_padded = cloud_masker.pad_mask(masked_value=0, padding=1, merge_masks=False) t2 = time.time() results[i, j, k] = t2 - t1 # Update the progress bar. progress.update(size, mask_num, k, t2 - t1) # Save for saves keepings np.savez_compressed(save_data, box_size=box_size, box_entropy=box_entropy, box_results=results) # Load the data else: data = np.load(load_data) box_size = data['box_size'] box_entropy = data['box_entropy'] results = data['box_results'] # Calculate the mean of all the iterations for each box size and entropy. results = np.nanmedian(results, axis=2) # Work out the min and max values for plotting vmin = max(10**-5, np.nanmin(results)) vmax = gu.truncate(np.nanmax(results), floor=False) print(results) print(f"Results size: {results.shape}") print(f"Min: {vmin}. Max: {vmax}") # Plot the data as a color mesh plt.pcolormesh(box_size**2, box_entropy, np.array(results).T, norm=LogNorm(vmin=vmin, vmax=vmax), cmap=plt.get_cmap('tab20b')) plt.xscale('log') plt.yscale('log') plt.xlabel("Size of the grid box (dimensionless)") plt.ylabel("Cloud percentage (%)") plt.ylim(10**-5, 100) # Fix the aspect ratio ax = plt.gca() f = plt.gcf() gu.fixed_aspect_ratio(ax=ax, ratio=1, adjustable=None, xscale='log', yscale='log') cbar = plt.colorbar() cbar.set_label('Time taken (s)') # Set size of the figure f.set_size_inches(5, 4) plt.tight_layout() if save_plot is None: plt.show() else: plt.savefig(save_plot, bbox_inches='tight', pad_inches=0.1, dpi=300)