def test_insert_before_extension(self, mock_dt): # Unix path with extension r = insert_before_extension( filename='/dir/subdir/file.jpg', s='newstring') self.assertEqual(r, '/dir/subdir/file.newstring.jpg') # Windows path with extension r = insert_before_extension( filename=r'c:\dir\file.jpg', s='newstring') self.assertEqual(r, r'c:\dir\file.newstring.jpg') # Unix path without extension r = insert_before_extension( filename='/dir/subdir/file', s='newstring') self.assertEqual(r, '/dir/subdir/file.newstring') # Windows path without extension r = insert_before_extension( filename=r'c:\dir\file', s='newstring') self.assertEqual(r, r'c:\dir\file.newstring') # Unix path without extra string (i.e., use timestamp) mock_dt.now = mock.Mock(return_value=datetime(2020, 7, 15, 10, 13, 46)) timestamp = '2020.07.15.10.13.46' r = insert_before_extension(filename='/dir/subdir/file.jpg') self.assertEqual(r, f'/dir/subdir/file.{timestamp}.jpg')
def divide_files_into_tasks(file_list_json, n_files_per_task=default_n_files_per_api_task): """ Divides the file *file_list_json*, which should contain a single json-encoded list of strings, into a set of json files, each containing *n_files_per_task* files (the last file will contain <= *n_files_per_task* files). If the input .json is blah.json, output_files will be blah.chunk001.json, blah.002.json, etc. Returns the list of .json filenames and the list of lists of files. return output_files,chunks """ with open(file_list_json) as f: file_list = json.load(f) chunks = divide_chunks(file_list, n_files_per_task) output_files = [] # i_chunk = 0; chunk = chunks[0] for i_chunk, chunk in enumerate(chunks): chunk_id = 'chunk{0:0>3d}'.format(i_chunk) output_file = path_utils.insert_before_extension( file_list_json, chunk_id) output_files.append(output_file) s = json.dumps(chunk, indent=1) with open(output_file, 'w') as f: f.write(s) return output_files, chunks
def divide_list_into_tasks( file_list: Sequence[str], save_path: str, n_files_per_task: int = MAX_FILES_PER_API_TASK ) -> Tuple[List[str], List[Sequence[Any]]]: """ Divides a list of filenames into a set of JSON files, each containing a list of length *n_files_per_task* (the last file will contain <= *n_files_per_task* files). Output JSON files are saved to *save_path* except the extension is replaced with `*.chunkXXX.json`. For example, if *save_path* is `blah.json`, output files will be `blah.chunk000.json`, `blah.chunk001.json`, etc. Args: file_list: list of str, filenames to split across multiple JSON files save_path: str, base path to save the chunked lists n_files_per_task: int, max number of files to include in each API task Returns: output_files: list of str, output JSON file names chunks: list of list of str, chunks[i] is the content of output_files[i] """ chunks = divide_chunks(file_list, n_files_per_task) output_files = [] for i_chunk, chunk in enumerate(chunks): chunk_id = 'chunk{:0>3d}'.format(i_chunk) output_file = path_utils.insert_before_extension(save_path, chunk_id) output_files.append(output_file) with open(output_file, 'w') as f: json.dump(chunk, f, indent=1) return output_files, chunks
options.bRenderHtml = False options.imageBase = image_base rde_string = 'rde_{:.2f}_{:.2f}_{}_{:.1f}'.format( options.confidenceMin, options.iouThreshold, options.occurrenceThreshold, options.maxSuspiciousDetectionSize) options.outputBase = os.path.join(filename_base, rde_string) options.filenameReplacements = {'': ''} options.debugMaxDir = -1 options.debugMaxRenderDir = -1 options.debugMaxRenderDetection = -1 options.debugMaxRenderInstance = -1 api_output_filename = list(folder_name_to_combined_output_file.values())[0] filtered_output_filename = path_utils.insert_before_extension( api_output_filename, 'filtered_{}'.format(rde_string)) suspiciousDetectionResults = repeat_detections_core.find_repeat_detections( api_output_filename, None, options) #%% Manual RDE step ## DELETE THE ANIMALS ## #%% Re-filtering from api.batch_processing.postprocessing import remove_repeat_detections remove_repeat_detections.remove_repeat_detections( inputFile=api_output_filename, outputFile=filtered_output_filename,
train_annotations = [] test_annotations = [] for ann in tqdm(annotations): category_id = ann['category_id'] image_id = ann['image_id'] category_name = category_id_to_category[category_id]['name'] im = image_id_to_image[image_id] if category_name == 'test': test_images.append(im) test_annotations.append(ann) else: train_images.append(im) train_annotations.append(ann) train_fn = insert_before_extension(output_json_filename, 'train') test_fn = insert_before_extension(output_json_filename, 'test') data['images'] = train_images data['annotations'] = train_annotations json.dump(data, open(train_fn, 'w'), indent=2) data['images'] = test_images data['annotations'] = test_annotations json.dump(data, open(test_fn, 'w'), indent=2) #%% Validate .json files options = sanity_check_json_db.SanityCheckOptions() options.baseDir = input_base_dir options.bCheckImageSizes = False
options.outputBase = os.path.join(filename_base,'rde_0.6_0.85_10_0.2') options.filenameReplacements = {'':''} options.confidenceMin = 0.6 options.confidenceMax = 1.01 options.iouThreshold = 0.85 options.occurrenceThreshold = 10 options.maxSuspiciousDetectionSize = 0.2 options.debugMaxDir = -1 options.debugMaxRenderDir = -1 options.debugMaxRenderDetection = -1 options.debugMaxRenderInstance = -1 api_output_filename = list(folder_name_to_combined_output_file.values())[0] filtered_output_filename = path_utils.insert_before_extension(api_output_filename,'filtered') suspiciousDetectionResults = repeat_detections_core.find_repeat_detections(api_output_filename, None, options) #%% Manual RDE step ## DELETE THE ANIMALS ## #%% Re-filtering from api.batch_processing.postprocessing import remove_repeat_detections