def make_pipeline_config_file(cfg): root_dir = cfg["training_dir"] ext = ".config" config_filepath = "" for path, subdirs, files in os.walk(root_dir): for filename in files: name, this_ext = os.path.splitext(filename) if this_ext == ext: config_filepath = os.path.join(path, filename) model_ckpt = os.path.join(os.path.dirname(config_filepath), "model.ckpt") file_contents = [] with open(config_filepath, "r") as fid: for line in fid: if "num_classes" in line: line = f'\tnum_classes: {len(cfg["id"])}\n' if "model.ckpt" in line: line = f'\tfine_tune_checkpoint: "{model_ckpt}"\n' if "label_map.pbtxt" in line: line = f'\tlabel_map_path: "{cfg["label_map"]}"\n' if "train.record" in line: line = f'\t\tinput_path: "{cfg["train_record"]}"\n' if "val.record" in line: line = f'\t\tinput_path: "{cfg["test_record"]}"\n' file_contents.append(line) with open(cfg["pipeline_config_filepath"], "w") as fid: for line in file_contents: fid.write(line) logging.info(f"{cfg['pipeline_config_filepath']} has been created")
def make_tfrecord(cfg): image_dir = cfg["image_dir"] for output_path, csv_input in zip( [cfg["train_record"], cfg["test_record"]], [cfg["train_csvpath"], cfg["test_csvpath"]], ): writer = tf.python_io.TFRecordWriter(output_path) path = os.path.join(image_dir) examples = pd.read_csv(csv_input) grouped = split(examples, "filename") for group in grouped: tf_example = create_tf_example(cfg["label_map"], group, path) writer.write(tf_example.SerializeToString()) writer.close() logging.info(f"Successfully created the TFRecords: {output_path}")
def makedir(inputDir, remove=False): """Summary: -------- Make directory Inputs: ------- inputDir (str): fullpath to directory to be created remove (bool): option to remove current existing folder """ if remove is True and os.path.exists(inputDir): logging.warning("Remove existing folder") shutil.rmtree(inputDir) if not os.path.exists(inputDir): logging.info("Making directory: {}".format(os.path.abspath(inputDir))) os.makedirs(inputDir) else: logging.info( "mkdir: Directory already exist: {}".format(os.path.abspath(inputDir)) )
def make_traintest_csv(cfg): df = xml_to_df(cfg["anno_dir"]) train_ratio = cfg["train_ratio"] output_dir = cfg["dataset_dir"] train_csvpath = cfg["train_csvpath"] test_csvpath = cfg["test_csvpath"] gb = df.groupby("filename") grouped_list = [gb.get_group(x) for x in gb.groups] nb_samples = len(grouped_list) train_index = np.random.choice( nb_samples, size=int(train_ratio * nb_samples), replace=False ) test_index = np.setdiff1d(list(range(nb_samples)), train_index) df_train = pd.concat([grouped_list[i] for i in train_index]) df_test = pd.concat([grouped_list[i] for i in test_index]) if output_dir is None: logging.error("output_dir is not defined") return utils.makedir(output_dir) logging.info(f"Making {output_dir}/train_labels.csv") df_train.to_csv(train_csvpath, index=None) logging.info(f"Making {output_dir}/test_labels.csv") df_test.to_csv(test_csvpath, index=None)
def download_url(url, to_file, **kwargs): if os.path.exists(to_file): logging.info("File exists: {}. Skip downloading".format(to_file)) return logging.info("Downloading to: {}".format(to_file)) makedir(os.path.dirname(to_file)) r = requests.get(url, stream=True) # Total size in bytes. total_size = int(r.headers.get("content-length", 0)) block_size = 1024 # 1 Kibibyte t = tqdm(total=total_size, unit="iB", unit_scale=True) with open(to_file, "wb") as fid: for data in r.iter_content(block_size): t.update(len(data)) fid.write(data) t.close() logging.info("\n")
def download_pretrained_model(cfg): url = cfg["model_zoo_url"] pretrained_model = cfg["pretrained_model"] page = requests.get(url) webpage = html.fromstring(page.content) url_list = webpage.xpath("//a/@href") url_list = list( filter(lambda x: "download.tensorflow.org" and "tar.gz" in x, url_list) ) pretrained_url = None for url in url_list: if pretrained_model in url: pretrained_url = url break if pretrained_url is not None: utils.download_url(pretrained_url, cfg["pretrained_filepath"]) model_tarfile = os.path.basename(cfg["pretrained_filepath"]) logging.info(f"Extracting {model_tarfile}") with tarfile.open(cfg["pretrained_filepath"]) as fid: fid.extractall(cfg["training_dir"]) logging.info(f"Removing {model_tarfile}") os.remove(cfg["pretrained_filepath"]) else: logging.info(f"{pretrained_url} is not available for downloading")
def inner(*args, **kwargs): func_name = method.__name__ logging.info(f"START: {func_name}.") result = method(*args, **kwargs) logging.info(f"DONE: {func_name}") return result