Пример #1
0
def main():
    t1 = time.time()
    hub.init(processes=True, n_workers=psutil.cpu_count(), memory_limit=55e9)
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "dataset_path",
        metavar="P",
        type=str,
        help="Path to coco2017 dataset",
        default="./data/COCOdataset2017",
    )
    parser.add_argument(
        "output_path",
        metavar="N",
        type=str,
        help="Dataset output path",
        default="COCOdataset2017",
    )
    parser.add_argument("year", metavar="Y", type=str, default="2017")
    args = parser.parse_args()
    tags = ["train", "val"]
    ds = {tag: load_dataset(args, tag) for tag in tags}
    for tag in ds:
        print(f"{tag}: {len(ds[tag])} samples")
    ds = dataset.concat([ds[tag] for tag in tags])
    # ds = ds["train"]
    ds.store(f"{args.output_path}")
    t2 = time.time()
    logger.info(f"Pipeline took {(t2 - t1) / 60} minutes")
Пример #2
0
def test_dataset_concat():
    t1 = tensor.from_array(np.array([5, 6, 7], dtype="int32"))
    t2 = tensor.from_array(np.array([1, 2, 3], dtype="int32"))
    ds1 = dataset.from_tensors({"t1": t1})
    ds2 = dataset.from_tensors({"t1": t2})
    ds = dataset.concat([ds1, ds2])
    assert len(ds) == 6
    assert (ds["t1"].compute() == np.array([5, 6, 7, 1, 2, 3], dtype="int32")).all()