def finfindr_feature_extract_aid_batch(ibs, aid_list, jobs=None, **kwargs): MAXJOBS = 4 if jobs is None: jobs = ut.num_cpus() jobs = min(jobs, len(aid_list)) jobs = min(jobs, MAXJOBS) url_clone_list = [] for job in range(jobs): container_name = 'flukebook_finfindr' urls_clone = ibs.docker_ensure(container_name, clone=job) if len(urls_clone) == 0: raise RuntimeError('Could not ensure container clone') elif len(urls_clone) == 1: url_clone = urls_clone[0] else: url_clone = urls_clone[0] args = ( urls_clone, url_clone, ) logger.info( '[WARNING] Multiple BACKEND_URLS:\n\tFound: %r\n\tUsing: %r' % args) url_clone_list.append(url_clone) config = { 'ext': '.jpg', } fpath_list = ibs.get_annot_chip_fpath(aid_list, ensure=True, config2_=config) url_list = [] index = 0 for fpath in fpath_list: url = url_clone_list[index] url_list.append(url) index += 1 index %= len(url_clone_list) args_list = list(zip(url_list, fpath_list)) json_result_gen = ut.generate2( finfindr_feature_extract_aid_helper, args_list, nTasks=len(args_list), nprocs=jobs, ordered=True, ) json_result_list = list(json_result_gen) return json_result_list
def resize_imagelist_generator(gpath_list, new_gpath_list, newsize_list, **kwargs): """ Resizes images and yeilds results asynchronously """ # Compute and write thumbnail in asychronous process kwargs['force_serial'] = kwargs.get('force_serial', True) kwargs['ordered'] = kwargs.get('ordered', True) arg_iter = zip(gpath_list, new_gpath_list, newsize_list) arg_list = list(arg_iter) return ut.generate2(_resize_worker, arg_list, **kwargs)
def classify(vector_list, weight_filepath, verbose=VERBOSE_SVM, **kwargs): """ Args: thumbail_list (list of str): the list of image thumbnails that need classifying Returns: iter """ import multiprocessing import numpy as np # Get correct weight if specified with shorthand if weight_filepath in CONFIG_URL_DICT: weight_url = CONFIG_URL_DICT[weight_filepath] if weight_url.endswith('.zip'): weight_filepath = ut.grab_zipped_url(weight_url, appname='ibeis') else: weight_filepath = ut.grab_file_url(weight_url, appname='ibeis', check_hash=True) # Get ensemble is_ensemble = isdir(weight_filepath) if is_ensemble: weight_filepath_list = sorted([ join(weight_filepath, filename) for filename in listdir(weight_filepath) if isfile(join(weight_filepath, filename)) ]) else: weight_filepath_list = [weight_filepath] num_weights = len(weight_filepath_list) assert num_weights > 0 # Form dictionaries num_vectors = len(vector_list) index_list = list(range(num_vectors)) # Generate parallelized wrapper OLD = False if is_ensemble and OLD: vectors_list = [ vector_list for _ in range(num_weights) ] args_list = zip(weight_filepath_list, vectors_list) nTasks = num_weights print('Processing ensembles in parallel using %d ensembles' % (num_weights, )) else: num_cpus = multiprocessing.cpu_count() vector_batch = int(np.ceil(float(num_vectors) / num_cpus)) vector_rounds = int(np.ceil(float(num_vectors) / vector_batch)) args_list = [] for vector_round in range(vector_rounds): start_index = vector_round * vector_batch stop_index = (vector_round + 1) * vector_batch assert start_index < num_vectors stop_index = min(stop_index, num_vectors) # print('Slicing index range: [%r, %r)' % (start_index, stop_index, )) # Slice gids and get feature data index_list_ = list(range(start_index, stop_index)) vector_list_ = vector_list[start_index: stop_index] assert len(index_list_) == len(vector_list_) for weight_filepath in weight_filepath_list: args = (weight_filepath, vector_list_, index_list_) args_list.append(args) nTasks = len(args_list) print('Processing vectors in parallel using vector_batch = %r' % (vector_batch, )) # Perform inference classify_iter = ut.generate2(classify_helper, args_list, nTasks=nTasks, ordered=True, force_serial=False) # Classify with SVM for each image vector score_dict = { index: [] for index in index_list } class_dict = { index: [] for index in index_list } for score_dict_, class_dict_ in classify_iter: for index in index_list: if index in score_dict_: score_dict[index] += score_dict_[index] if index in class_dict_: class_dict[index] += class_dict_[index] # Organize and compute mode and average for class and score for index in index_list: score_list_ = score_dict[index] class_list_ = class_dict[index] score_ = sum(score_list_) / len(score_list_) class_ = max(set(class_list_), key=class_list_.count) class_ = 'positive' if int(class_) == 1 else 'negative' yield score_, class_