Exemplo n.º 1
0
def collect_experiment(key, root_uri, output_dir, get_pred_package=False):
    print('\nCollecting experiment {}...\n'.format(key))

    if root_uri.startswith('s3://'):
        predict_package_uris = list_paths(join(root_uri, key, 'bundle'),
                                          ext='predict_package.zip')
        eval_json_uris = list_paths(join(root_uri, key, 'eval'),
                                    ext='eval.json')
    else:
        predict_package_uris = glob.glob(
            join(root_uri, key, 'bundle', '*', 'predict_package.zip'))
        eval_json_uris = glob.glob(
            join(root_uri, key, 'eval', '*', 'eval.json'))

    if len(predict_package_uris) > 1 or len(eval_json_uris) > 1:
        print('Cannot collect from key with multiple experiments!!!')
        return

    if len(predict_package_uris) == 0 or len(eval_json_uris) == 0:
        print('Missing output!!!')
        return

    predict_package_uri = predict_package_uris[0]
    eval_json_uri = eval_json_uris[0]
    make_dir(join(output_dir, key))
    if get_pred_package:
        download_or_copy(predict_package_uri, join(output_dir, key))

    download_or_copy(eval_json_uri, join(output_dir, key))

    eval_json = file_to_json(join(output_dir, key, 'eval.json'))
    pprint.pprint(eval_json['overall'], indent=4)
    def test_list_paths_s3(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        s3_path = 's3://{}/xxx/lorem.txt'.format(self.bucket_name)
        s3_directory = 's3://{}/xxx/'.format(self.bucket_name)
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)

        list_paths(s3_directory)
        self.assertEqual(len(list_paths(s3_directory)), 1)
Exemplo n.º 3
0
    def unzip_data(self, uri: Union[str, List[str]]) -> List[str]:
        """Unzip dataset zip files.

        Args:
            uri: a list of URIs of zip files or the URI of a directory containing
                zip files

        Returns:
            paths to directories that each contain contents of one zip file
        """
        data_dirs = []

        if isinstance(uri, list):
            zip_uris = uri
        else:
            zip_uris = ([uri] if uri.endswith('.zip') else list_paths(
                uri, 'zip'))

        for zip_ind, zip_uri in enumerate(zip_uris):
            zip_path = get_local_path(zip_uri, self.data_cache_dir)
            if not isfile(zip_path):
                zip_path = download_if_needed(zip_uri, self.data_cache_dir)
            with zipfile.ZipFile(zip_path, 'r') as zipf:
                data_dir = join(self.tmp_dir, 'data', str(uuid.uuid4()),
                                str(zip_ind))
                data_dirs.append(data_dir)
                zipf.extractall(data_dir)

        return data_dirs
Exemplo n.º 4
0
def collect_eval_dir(root_uri):
    eval_json_uris = list_paths(join(root_uri, 'eval'), ext='eval.json')
    for eval_json_uri in eval_json_uris:
        eval_json = file_to_json(eval_json_uri)
        print(basename(dirname(eval_json_uri)))
        print(eval_json['overall'][-1]['f1'])
        print()
Exemplo n.º 5
0
 def get_scene_ids(self):
     label_dir = os.path.join(self.raw_uri, self.base_dir, self.label_dir)
     label_paths = list_paths(label_dir, ext='.geojson')
     label_re = re.compile(r'.*{}(\d+)\.geojson'.format(
         self.label_fn_prefix))
     scene_ids = [
         label_re.match(label_path).group(1) for label_path in label_paths
     ]
     return scene_ids
    def test_sync_from_dir_noop_local(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        src = os.path.join(self.tmp_dir.name, 'lorem')
        make_dir(src, check_empty=False)

        fs = FileSystem.get_file_system(src, 'r')
        fs.write_bytes(path, bytes([0x00, 0x01]))
        sync_from_dir(src, src, delete=True)

        self.assertEqual(len(list_paths(src)), 1)
    def test_copy_to_local(self):
        path1 = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        path2 = os.path.join(self.tmp_dir.name, 'yyy', 'ipsum.txt')
        dir1 = os.path.dirname(path1)
        dir2 = os.path.dirname(path2)
        make_dir(dir1, check_empty=False)
        make_dir(dir2, check_empty=False)

        str_to_file(self.lorem, path1)

        upload_or_copy(path1, path2)
        self.assertEqual(len(list_paths(dir2)), 1)
    def test_sync_to_dir_local(self):
        path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt')
        src = os.path.dirname(path)
        dst = os.path.join(self.tmp_dir.name, 'xxx')
        make_dir(src, check_empty=False)
        make_dir(dst, check_empty=False)

        fs = FileSystem.get_file_system(path, 'r')
        fs.write_bytes(path, bytes([0x00, 0x01]))
        sync_to_dir(src, dst, delete=True)

        self.assertEqual(len(list_paths(dst)), 1)
Exemplo n.º 9
0
get_ipython().run_line_magic('matplotlib', 'inline')

# %%
aois = ['Vegas', 'Paris', 'Shanghai', 'Khartoum']
aoi_inds = [2, 3, 4, 5]
feature_type = 'buildings'
out_path = '/opt/data/research/ssl/spacenet.csv'
dfs = []

for aoi, aoi_ind in zip(aois, aoi_inds):
    label_dir = f's3://spacenet-dataset/spacenet/SN2_{feature_type}/train/AOI_{aoi_ind}_{aoi}/geojson_{feature_type}/'
    image_dir = f's3://spacenet-dataset/spacenet/SN2_{feature_type}/train/AOI_{aoi_ind}_{aoi}/PS-RGB/'
    image_fn_prefix = f'SN2_{feature_type}_train_AOI_{aoi_ind}_{aoi}_PS-RGB_img'
    label_fn_prefix = f'SN2_{feature_type}_train_AOI_{aoi_ind}_{aoi}_geojson_{feature_type}_img'

    label_paths = list_paths(label_dir, ext='.geojson')
    label_re = re.compile(r'.*{}(\d+)\.geojson'.format(label_fn_prefix))
    scene_ids = [label_re.match(label_path).group(1) for label_path in label_paths]

    aoi_info = []
    for scene_id in scene_ids:
        if aoi == 'Vegas' and scene_id == '1000':
            continue

        image_uri = join(image_dir, f'{image_fn_prefix}{scene_id}.tif')
        label_uri = join(label_dir, f'{label_fn_prefix}{scene_id}.geojson')
        scene_id = f'{aoi}_{scene_id}'
        aoi_info.append((scene_id.lower(), image_uri, label_uri))

    random.seed(1234)
    random.shuffle(aoi_info)
Exemplo n.º 10
0
def get_run_dirs(base_uri):
    run_dirs = list_paths(base_uri, ext='train/test_metrics.json')
    return [dirname(dirname(d)) for d in run_dirs]