def main(argv): args = parse_argments(argv) basename = args.input_dir.split('/')[-2] train_dir = os.path.join(args.output_dir, 'train', basename) test_dir = os.path.join(args.output_dir, 'test', basename) os.makedirs(train_dir, exist_ok=True) os.makedirs(test_dir, exist_ok=True) names = set(get_filenames(args.input_dir)) test_names = set(get_filenames(test_dir)) train_names = set(get_filenames(train_dir)) invalid_names = test_names & train_names names -= test_names names -= train_names for name in invalid_names: os.remove(os.path.join(test_dir, name)) logger.info('"%s" is duplicated... Remove', name) n_test = args.n_test - len(test_names) test_samples = random.sample(names, n_test) for name in names: src_path = os.path.join(args.input_dir, name) if name in test_samples: dst_path = os.path.join(test_dir, name) else: dst_path = os.path.join(train_dir, name) shutil.copyfile(src_path, dst_path)
def main(argv): args = parse_arguments(argv) dirname = os.path.basename(args.input_dir) valid_dir = os.path.join(args.output_dir, dirname, 'valid') invalid_dir = os.path.join(args.output_dir, dirname, 'invalid') os.makedirs(valid_dir, exist_ok=True) os.makedirs(invalid_dir, exist_ok=True) removed_json = os.path.join(args.output_dir, dirname, '.cache.json') names = get_filenames(args.input_dir) valid_names = get_filenames(valid_dir) invalid_names = get_filenames(invalid_dir) removed_names = read_json(removed_json) removed_names = [] if removed_names is None else removed_names names = sorted( set(names) - set(valid_names) - set(invalid_names) - set(removed_names)) # Instruction sys.stdout.write('Key input instructions:\n' 'j: Accept current image\n' 'k: Reject current image\n' 'u: Undo recent validation\n' 'd: Exclude image \n' 'q: Quit validation\n') i = 0 while i < len(names): path = os.path.join(args.input_dir, names[i]) key = show_image(path, args.size) if key == KeyStatus.UNDO and i > 1: i -= 1 if os.path.exists(os.path.join(valid_dir, names[i])): os.remove(os.path.join(valid_dir, names[i])) elif os.path.exists(os.path.join(invalid_dir, names[i])): os.remove(os.path.join(invalid_dir, names[i])) else: removed_names.pop() elif key == KeyStatus.OK: shutil.copyfile(path, os.path.join(valid_dir, names[i])) i += 1 elif key == KeyStatus.FAIL: shutil.copyfile(path, os.path.join(invalid_dir, names[i])) i += 1 elif key == KeyStatus.REMOVE: removed_names.append(names[i]) write_json(removed_json, removed_names) i += 1 else: exit()
def import_csv(data_folder): csv, spec, cfg = get_filenames(data_folder) wipe_db_tables() # to_database(csv, spec) to_database(csv, spec, cfg) db_dump() dump_var_list_explained()
def main(argv): args = parse_arguments(argv) dirname = os.path.basename(args.input_dir) output_dir = os.path.join(args.output_dir, dirname) extractor = AnimeFaceExtractor(args.detector_path, output_dir, image_size=args.img_size) names = get_filenames(args.input_dir) for name in names: extractor(os.path.join(args.input_dir, name))
def batch_match_to_pub(filename, result_filename): """ :param filename: :param result_filename: :return: """ filenames = get_filenames(filename) if len(filenames) > 0: if result_filename: # output file with open(result_filename, 'w') as fp: # one file at a time, parse and score, and then write the result to the file for arXiv_filename in filenames: a_match, for_inspection = single_match_to_pub(arXiv_filename) fp.write('%s\n'%a_match) if for_inspection: write_for_inspection_hits(result_filename, for_inspection) else: for arXiv_filename in filenames: single_match_output(arXiv_filename)
def get_results(labels_filename, xd_preconv_filename, target_variables, set_type="Test"): """ Gather actual and predicted values for a list of target_variables. This function includes all the models which are capable of predicting on those target_variables. """ ACTUAL_COL = (255, 255, 255) # WHITE MODEL1_COL = (255, 0, 0) # BLUE MODEL2_COL = (0, 0, 255) # RED with open(xd_preconv_filename, "rb") as f: xd_preconv = pickle.load(f) results = [] for target_variable in target_variables: filenames = get_filenames(labels_filename, target_variable, set_type=set_type) # print(f"len filenames: {len(filenames)}") if target_variable == "queue_end_pos": print(f"len filenames: {len(filenames)}") # Actual r = Series_y("actual.queue_end_pos", True, ACTUAL_COL, target_variable) r.set_results( get_y_data(LABELS_FILENAME, filenames, target_variable)) results.append(r) TO_BE_EVALUATED = "CNN_EoQ_VGG16_noaug_weighted_20210514.h5" r = Series_y( TO_BE_EVALUATED, False, MODEL1_COL, target_variable, ) r.set_results( VGG16_predict( TO_BE_EVALUATED, xd_preconv, filenames, target_variable, )) results.append(r) elif target_variable == "queue_full": r = Series_y("actual.queue_full", True, ACTUAL_COL, target_variable) r.set_results( get_y_data(LABELS_FILENAME, filenames, target_variable)) results.append(r) r = Series_y("CNN_Queue_full_20210408.h5", False, MODEL1_COL, target_variable) h = VGG16_predict(r"CNN_Queue_full_20210408.h5", xd_preconv, filenames, target_variable) r.set_results(h) results.append(r) elif target_variable == "lanes": r = Series_y("actual.lanes", True, ACTUAL_COL, target_variable) y = get_y_data(LABELS_FILENAME, filenames, target_variable) y = convert_float_lanes_to_boolean(y, input_is_12=True) r.set_results(y) results.append(r) r = Series_y( "CNN_Lanes_VGG16_weighted_20210408.h5", False, MODEL1_COL, target_variable, ) h = VGG16_predict( r"CNN_Lanes_VGG16_weighted_20210408.h5", xd_preconv, filenames, target_variable, ) r.set_results(h) results.append(r) clear_session( ) # Clear tf memory usage - https://www.tensorflow.org/api_docs/python/tf/keras/backend/clear_session return results
import pandas as pd #import tabulate from common import docstring_to_file # NOTE: this function is a direct query to all unique labels from database import get_unique_labels FILLER = "<...>" # ---------------------------------------------------------------------------- from common import get_filenames data_folder = "../data/ind09/" csv, spec, cfg = get_filenames(data_folder) from load_spec import load_spec default_dicts = load_spec(spec) # ---------------------------------------------------------------------------- def get_var_abbr(name): words = name.split('_') return '_'.join(itertools.takewhile(lambda word: word.isupper(), words)) assert get_var_abbr('PROD_E_TWh') == 'PROD_E' def get_unit_abbr(name): words = name.split('_') return '_'.join(itertools.dropwhile(lambda word: word.isupper(), words)) assert get_unit_abbr('PROD_E_TWh') == 'TWh'