示例#1
0
def main(config):

    board = expanduser(config["undistort"]["board"])
    framerate = int(config["undistort"]["framerate"])
    m_corners = int(config["undistort"]["m_corners"])
    n_corners = int(config["undistort"]["n_corners"])
    target = expanduser(config["undistort"]["target"])
    do_debug = config["undistort"]["do_debug"]
    do_crop = config["undistort"]["do_crop"]

    cam_calib_results = calibrate_checkerboard(board,
                                               m_corners,
                                               n_corners,
                                               framerate=framerate,
                                               do_debug=do_debug)

    cam_mtx, dist = cam_calib_results["cam_mtx"], cam_calib_results["dist"]

    if Path(target).is_file():
        undistort(target, cam_mtx, dist, framerate, do_crop=do_crop)

    elif Path(target).is_dir():

        vids = [str(path.absolute()) for path in Path(target).rglob("*.mp4")]
        vids = [
            vid for vid in vids
            if "checkerboards" not in vid and "undistorted" not in vid
        ]

        if len(vids) == 0:
            raise ValueError("No '.mp4' videos were found.")

        for vid in vids:
            undistort(vid, cam_mtx, dist, framerate, do_crop=do_crop)
示例#2
0
文件: lib.py 项目: wonkodv/hanstool
def load_scripts(path):
    if not isinstance(path, pathlib.Path):
        path = str(path)
        path = os.path.expanduser(path)
        path = pathlib.Path(path)
    if path.is_dir():
        l = path.glob('*.py')
        # sort b.50.py before a.80.py
        l = sorted(l, key=lambda p: [p.suffixes[-2][1:] if len(p.suffixes)>1 else "",p])
        for p in l:
            load_scripts(p)
    elif path.is_file():
        with path.open("rt") as f:
            c = f.read()
        c = compile(c, str(path), "exec")
        try:
            env.Env['__FILE__'] = str(path.absolute())
            exec (c, env.Env.dict)
            del env.Env['__FILE__']
        except NotImplementedError:
            # Script wanted to be ignored
            pass
        SCRIPTS.append(path)
    else:
        raise Exception("neither file nor dir in load_Scripts", path)
示例#3
0
 def validate_build_directory(parser: argparse, arg: str) -> str:
     path: pathlib.Path = pathlib.Path(arg)
     if path.exists():
         return str(path.absolute().as_posix())
     else:
         raise argparse.ArgumentError(
             "FATAL: path {} not exists".format(arg))
def addAllFiles(targetDir, sourceDir):
    #Get everything recursively.
    rootpath = Path(sourceDir)
    paths = list(rootpath.glob("**/*"))
    print(targetDir)
    print(sourceDir)
    for path in paths:
        rel = str(path.relative_to(sourceDir))
        if(path.is_dir()):
            ensureFolder(targetDir+slash+rel)
        else:
            ensureFolder((targetDir+slash+rel).rpartition(slash)[0])
            
            if not (Path(targetDir+slash+rel).exists()):
                #in dst, src order
                makeHardLink(targetDir+slash+rel, str(path.absolute()))
                addToLinkManifest(targetDir+slash+rel, str(path.absolute()))
示例#5
0
    def save(self):
        csv_name = "{}.csv".format(self.name)
        path = Path.cwd() / 'data' / csv_name

        with open(path.absolute(), mode='w') as csv_file:
            playlist_csv = csv.writer(csv_file, delimiter=',')
            for v in self.objs:
                playlist_csv.writerow([v.artist, v.album, v.title])
示例#6
0
def load_dataframe(uri_to_file: str, encoding: str):
    path = Path(uri_to_file)
    try:
        return pd.read_csv(path.absolute(),
                           sep=";",
                           encoding=encoding,
                           quoting=csv.QUOTE_NONE)
    except pd.errors.EmptyDataError as err:
        print(err)
示例#7
0
def HandleFile(path):
    data = []
    print(f"path: {path.absolute()}")
    #with open(path.absolute(), mode="r", encoding="iso-8859-1") as inputFile:
    with open(path.absolute(), mode="r", encoding="utf-8") as inputFile:
        data = json.load(inputFile)

    for component in data:
        HandleComponent(component)
示例#8
0
def find_dandiset_and_subpath(path: Path) -> tuple[Dandiset, Path]:
    """
    Find the Dandiset rooted at ``path`` or one of its parents, and return the
    Dandiset along with ``path`` made relative to the Dandiset root
    """
    path = path.absolute()
    ds = Dandiset.find(path)
    if ds is None:
        raise ValueError(f"{path}: not a Dandiset")
    return (ds, path.relative_to(ds.path))
示例#9
0
    def validate_mount_point(parser: argparse, arg: str) -> str:
        path: pathlib.Path = pathlib.Path(arg)
        abs_path_str: str = str(path.absolute().as_posix())

        if not path.exists():
            os.mkdir(abs_path_str)
            logging.info(
                "Creating {}, which does not exists.".format(abs_path_str))

        return abs_path_str
示例#10
0
def InitializeMunicipalities(path):
    data = {}
    with open(path.absolute(), mode="r", encoding="utf-8") as inputFile:
        data = json.load(inputFile)

    counties = data["countyList"]
    for county in counties:
        municipalities = county["municipalityList"]
        for municipality in municipalities:
            municipalityNumberByName[municipality["name"]] = municipality["municipalityNumber"]
示例#11
0
def get_contents(path):
    print(now_time() + "Loading " + path.name)
    with open(path.absolute()) as f:
        contents = f.readlines()

    # Remove trailing whitespace & replace tabs with spaces
    contents = [line.rstrip() for line in contents]
    contents = [line.replace('\t', '    ') for line in contents]

    # Return the blob
    return ('\n'.join(contents)).strip()
示例#12
0
    def __call__(self, ctx) -> str:
        from pathlib import Path

        path = Path(self._path)
        if not path.is_absolute():
            path = Path(ctx._pwd) / path
            path = path.absolute()

        if not path.exists():
            path.mkdir(parents=True, exist_ok=True)
        elif not path.is_dir():
            raise ConfigurationError("'%s' is not a directory" % self._path)
        return str(path)
示例#13
0
def get_data_path():
    '''Helper function to get data path within project.
    '''
    from pathlib import Path

    path = Path('.').resolve()
    path_string = path.absolute().as_posix()
    if 'src' in path_string:
        path = path.parent / 'data'
    elif 'data' in path_string:
        pass
    else:
        path = path / 'data'
    path_to_data = f'{path.absolute().as_posix()}/'
    return path_to_data
示例#14
0
    def __init__(self, name: str, path: Path, count: int, force=True):
        """[summary]

        Arguments:
            path {Path} -- The file path of the token file
            count {int} -- Number of tokens (overrides previous definitions)
            force --   If the token has already been created, force to write the maximum
                       number of tokens
        """
        super().__init__()

        self.path = path
        self.path.mkdir(exist_ok=True, parents=True)

        self.cache: Dict[str, TokenFile] = {}

        self.infopath = path / "token.info"

        self.ipc_lock = fasteners.InterProcessLock(path / "token.lock")
        self.lock = threading.Lock()

        self.name = name

        # Set the new number of tokens
        with self.lock, self.ipc_lock:
            # Get the maximum number of tokens
            if force or not self.infopath.is_file():
                self.total = count
                self.infopath.write_text(str(count))

            self.timestamp = os.path.getmtime(self.path)
            self._update()

        # Watched path
        self.watchedpath = str(path.absolute())
        self.watcher = ipcom().fswatch(self, self.path, recursive=True)
        logger.info("Watching %s", self.watchedpath)
示例#15
0
def upload(
    paths,
    existing="refresh",
    validation="require",
    dandiset_path=None,
    dandi_instance="dandi",
    allow_any_path=False,
    upload_dandiset_metadata=False,
    devel_debug=False,
    jobs=None,
    jobs_per_file=None,
    sync=False,
):
    from .dandiapi import DandiAPIClient
    from .dandiset import APIDandiset, Dandiset
    from .support.digests import get_digest

    dandiset = Dandiset.find(dandiset_path)
    if not dandiset:
        raise RuntimeError(
            f"Found no {dandiset_metadata_file} anywhere.  "
            "Use 'dandi register', 'download', or 'organize' first"
        )

    instance = get_instance(dandi_instance)
    assert instance.api is not None
    api_url = instance.api

    client = DandiAPIClient(api_url)
    client.dandi_authenticate()

    dandiset = APIDandiset(dandiset.path)  # "cast" to a new API based dandiset

    ds_identifier = dandiset.identifier

    if not re.match(dandiset_identifier_regex, str(ds_identifier)):
        raise ValueError(
            f"Dandiset identifier {ds_identifier} does not follow expected "
            f"convention {dandiset_identifier_regex!r}.  Use "
            f"'dandi register' to get a legit identifier"
        )

    from .metadata import get_default_metadata, nwb2asset
    from .pynwb_utils import ignore_benign_pynwb_warnings
    from .support.pyout import naturalsize
    from .utils import find_dandi_files, find_files, path_is_subpath
    from .validate import validate_file

    ignore_benign_pynwb_warnings()  # so validate doesn't whine

    #
    # Treat paths
    #
    if not paths:
        paths = [dandiset.path]
    original_paths = paths

    # Expand and validate all paths -- they should reside within dandiset
    paths = find_files(".*", paths) if allow_any_path else find_dandi_files(paths)
    paths = list(map(Path, paths))
    npaths = len(paths)
    lgr.info(f"Found {npaths} files to consider")
    for path in paths:
        if not (
            allow_any_path
            or path.name == dandiset_metadata_file
            or path.name.endswith(".nwb")
        ):
            raise NotImplementedError(
                f"ATM only .nwb and dandiset.yaml should be in the paths to upload. Got {path}"
            )
        if not path_is_subpath(str(path.absolute()), dandiset.path):
            raise ValueError(f"{path} is not under {dandiset.path}")

    # We will keep a shared set of "being processed" paths so
    # we could limit the number of them until
    #   https://github.com/pyout/pyout/issues/87
    # properly addressed
    process_paths = set()
    from collections import defaultdict

    uploaded_paths = defaultdict(lambda: {"size": 0, "errors": []})

    def skip_file(msg):
        return {"status": "skipped", "message": str(msg)}

    # TODO: we might want to always yield a full record so no field is not
    # provided to pyout to cause it to halt
    def process_path(path, relpath):
        """

        Parameters
        ----------
        path: Path
          Non Pure (OS specific) Path
        relpath:
          For location on server.  Will be cast to PurePosixPath

        Yields
        ------
        dict
          Records for pyout
        """
        # Ensure consistent types
        path = Path(path)
        relpath = PurePosixPath(relpath)
        try:
            try:
                path_stat = path.stat()
                yield {"size": path_stat.st_size}
            except FileNotFoundError:
                yield skip_file("ERROR: File not found")
                return
            except Exception as exc:
                # without limiting [:50] it might cause some pyout indigestion
                yield skip_file("ERROR: %s" % str(exc)[:50])
                return

            #
            # Validate first, so we do not bother server at all if not kosher
            #
            # TODO: enable back validation of dandiset.yaml
            if path.name != dandiset_metadata_file and validation != "skip":
                yield {"status": "pre-validating"}
                validation_errors = validate_file(path)
                yield {"errors": len(validation_errors)}
                # TODO: split for dandi, pynwb errors
                if validation_errors:
                    if validation == "require":
                        yield skip_file("failed validation")
                        return
                else:
                    yield {"status": "validated"}
            else:
                # yielding empty causes pyout to get stuck or crash
                # https://github.com/pyout/pyout/issues/91
                # yield {"errors": '',}
                pass

            #
            # Special handling for dandiset.yaml
            # Yarik hates it but that is life for now. TODO
            #
            if path.name == dandiset_metadata_file:
                # TODO This is a temporary measure to avoid breaking web UI
                # dandiset metadata schema assumptions.  All edits should happen
                # online.
                if upload_dandiset_metadata:
                    yield {"status": "updating metadata"}
                    client.set_dandiset_metadata(
                        dandiset.identifier, metadata=dandiset.metadata
                    )
                    yield {"status": "updated metadata"}
                else:
                    yield skip_file("should be edited online")
                return

            #
            # Compute checksums
            #
            yield {"status": "digesting"}
            try:
                file_etag = get_digest(path, digest="dandi-etag")
            except Exception as exc:
                yield skip_file("failed to compute digest: %s" % str(exc))
                return

            extant = client.get_asset_bypath(ds_identifier, "draft", str(relpath))
            if extant is not None:
                # The endpoint used to search by paths doesn't include asset
                # metadata, so we need to make another API call:
                metadata = client.get_asset(ds_identifier, "draft", extant["asset_id"])
                local_mtime = ensure_datetime(path_stat.st_mtime)
                remote_mtime_str = metadata.get("blobDateModified")
                d = metadata.get("digest", {})
                if "dandi:dandi-etag" in d:
                    extant_etag = d["dandi:dandi-etag"]
                else:
                    # TODO: Should this error instead?
                    extant_etag = None
                if remote_mtime_str is not None:
                    remote_mtime = ensure_datetime(remote_mtime_str)
                    remote_file_status = (
                        "same"
                        if extant_etag == file_etag and remote_mtime == local_mtime
                        else (
                            "newer"
                            if remote_mtime > local_mtime
                            else ("older" if remote_mtime < local_mtime else "diff")
                        )
                    )
                else:
                    remote_mtime = None
                    remote_file_status = "no mtime"

                exists_msg = f"exists ({remote_file_status})"

                if existing == "error":
                    # as promised -- not gentle at all!
                    raise FileExistsError(exists_msg)
                if existing == "skip":
                    yield skip_file(exists_msg)
                    return
                # Logic below only for overwrite and reupload
                if existing == "overwrite":
                    if extant_etag == file_etag:
                        yield skip_file(exists_msg)
                        return
                elif existing == "refresh":
                    if extant_etag == file_etag:
                        yield skip_file("file exists")
                        return
                    elif remote_mtime is not None and remote_mtime >= local_mtime:
                        yield skip_file(exists_msg)
                        return
                elif existing == "force":
                    pass
                else:
                    raise ValueError(f"invalid value for 'existing': {existing!r}")

                yield {"message": f"{exists_msg} - reuploading"}

            #
            # Extract metadata - delayed since takes time, but is done before
            # actual upload, so we could skip if this fails
            #
            # Extract metadata before actual upload and skip if fails
            # TODO: allow for for non-nwb files to skip this step
            # ad-hoc for dandiset.yaml for now
            yield {"status": "extracting metadata"}
            try:
                asset_metadata = nwb2asset(
                    path, digest=file_etag, digest_type="dandi_etag"
                )
            except Exception as exc:
                lgr.exception("Failed to extract metadata from %s", path)
                if allow_any_path:
                    yield {"status": "failed to extract metadata"}
                    asset_metadata = get_default_metadata(
                        path, digest=file_etag, digest_type="dandi_etag"
                    )
                else:
                    yield skip_file("failed to extract metadata: %s" % str(exc))
                    return
            metadata = asset_metadata.json_dict()
            metadata["path"] = str(relpath)

            #
            # Upload file
            #
            yield {"status": "uploading"}
            validating = False
            for r in client.iter_upload(
                ds_identifier, "draft", metadata, str(path), jobs=jobs_per_file
            ):
                if r["status"] == "uploading":
                    uploaded_paths[str(path)]["size"] = r.pop("current")
                    yield r
                elif r["status"] == "post-validating":
                    # Only yield the first "post-validating" status
                    if not validating:
                        yield r
                        validating = True
                else:
                    yield r
            yield {"status": "done"}

        except Exception as exc:
            if devel_debug:
                raise
            # Custom formatting for some exceptions we know to extract
            # user-meaningful message
            message = str(exc)
            uploaded_paths[str(path)]["errors"].append(message)
            yield {"status": "ERROR", "message": message}
        finally:
            process_paths.remove(str(path))

    # We will again use pyout to provide a neat table summarizing our progress
    # with upload etc
    from .support import pyout as pyouts

    # for the upload speeds we need to provide a custom  aggregate
    t0 = time.time()

    def upload_agg(*ignored):
        dt = time.time() - t0
        # to help avoiding dict length changes during upload
        # might be not a proper solution
        # see https://github.com/dandi/dandi-cli/issues/502 for more info
        uploaded_recs = list(uploaded_paths.values())
        total = sum(v["size"] for v in uploaded_recs)
        if not total:
            return ""
        speed = total / dt if dt else 0
        return "%s/s" % naturalsize(speed)

    pyout_style = pyouts.get_style(hide_if_missing=False)
    pyout_style["upload"]["aggregate"] = upload_agg

    rec_fields = ["path", "size", "errors", "upload", "status", "message"]
    out = pyouts.LogSafeTabular(style=pyout_style, columns=rec_fields, max_workers=jobs)

    with out, client.session():
        for path in paths:
            while len(process_paths) >= 10:
                lgr.log(2, "Sleep waiting for some paths to finish processing")
                time.sleep(0.5)

            rec = {"path": str(path)}
            process_paths.add(str(path))

            try:
                relpath = path.absolute().relative_to(dandiset.path)

                rec["path"] = str(relpath)
                if devel_debug:
                    # DEBUG: do serially
                    for v in process_path(path, relpath):
                        print(str(v), flush=True)
                else:
                    rec[tuple(rec_fields[1:])] = process_path(path, relpath)
            except ValueError as exc:
                if "does not start with" in str(exc):
                    # if top_path is not the top path for the path
                    # Provide more concise specific message without path details
                    rec.update(skip_file("must be a child of top path"))
                else:
                    rec.update(skip_file(exc))
            out(rec)

    if sync:
        relpaths = []
        for p in original_paths:
            rp = os.path.relpath(p, dandiset.path)
            relpaths.append("" if rp == "." else rp)
        path_prefix = reduce(os.path.commonprefix, relpaths)
        to_delete = []
        for asset in client.get_dandiset_assets(
            ds_identifier, "draft", path=path_prefix
        ):
            if (
                any(p == "" or path_is_subpath(asset["path"], p) for p in relpaths)
                and not Path(dandiset.path, asset["path"]).exists()
            ):
                to_delete.append(asset["asset_id"])
        if to_delete and click.confirm(
            f"Delete {pluralize(len(to_delete), 'asset')} on server?"
        ):
            for asset_id in to_delete:
                client.delete_asset(ds_identifier, "draft", asset_id)
示例#16
0
文件: upload.py 项目: satra/dandi-cli
def _new_upload(
    api_url,
    dandiset,
    paths,
    existing,
    validation,
    dandiset_path,
    allow_any_path,
    upload_dandiset_metadata,
    devel_debug,
):
    from .dandiapi import DandiAPIClient
    from .dandiset import APIDandiset
    from .support.digests import Digester

    client = DandiAPIClient(api_url)
    client.dandi_authenticate()

    dandiset = APIDandiset(dandiset.path)  # "cast" to a new API based dandiset

    ds_identifier = dandiset.identifier
    # this is a path not a girder id

    if not re.match(dandiset_identifier_regex, str(ds_identifier)):
        raise ValueError(
            f"Dandiset identifier {ds_identifier} does not follow expected "
            f"convention {dandiset_identifier_regex!r}.  Use "
            f"'dandi register' to get a legit identifier")

    from .metadata import nwb2asset
    from .pynwb_utils import ignore_benign_pynwb_warnings
    from .support.pyout import naturalsize
    from .utils import find_dandi_files, find_files, path_is_subpath
    from .validate import validate_file

    ignore_benign_pynwb_warnings()  # so validate doesn't whine

    #
    # Treat paths
    #
    if not paths:
        paths = [dandiset.path]

    # Expand and validate all paths -- they should reside within dandiset
    paths = find_files(".*",
                       paths) if allow_any_path else find_dandi_files(paths)
    paths = list(map(Path, paths))
    npaths = len(paths)
    lgr.info(f"Found {npaths} files to consider")
    for path in paths:
        if not (allow_any_path or path.name == dandiset_metadata_file
                or path.name.endswith(".nwb")):
            raise NotImplementedError(
                f"ATM only .nwb and dandiset.yaml should be in the paths to upload. Got {path}"
            )
        if not path_is_subpath(str(path.absolute()), dandiset.path):
            raise ValueError(f"{path} is not under {dandiset.path}")

    # We will keep a shared set of "being processed" paths so
    # we could limit the number of them until
    #   https://github.com/pyout/pyout/issues/87
    # properly addressed
    process_paths = set()
    from collections import defaultdict

    uploaded_paths = defaultdict(lambda: {"size": 0, "errors": []})

    def skip_file(msg):
        return {"status": "skipped", "message": str(msg)}

    # TODO: we might want to always yield a full record so no field is not
    # provided to pyout to cause it to halt
    def process_path(path, relpath):
        """

        Parameters
        ----------
        path: Path
          Non Pure (OS specific) Path
        relpath:
          For location on server.  Will be cast to PurePosixPath

        Yields
        ------
        dict
          Records for pyout
        """
        # Ensure consistent types
        path = Path(path)
        relpath = PurePosixPath(relpath)
        try:
            try:
                path_stat = path.stat()
                yield {"size": path_stat.st_size}
            except FileNotFoundError:
                yield skip_file("ERROR: File not found")
                return
            except Exception as exc:
                # without limiting [:50] it might cause some pyout indigestion
                yield skip_file("ERROR: %s" % str(exc)[:50])
                return

            #
            # Compute checksums and possible other digests (e.g. for s3, ipfs - TODO)
            #
            yield {"status": "digesting"}
            try:
                # TODO: in theory we could also cache the result, but since it is
                # critical to get correct checksums, safer to just do it all the time.
                # Should typically be faster than upload itself ;-)
                digester = Digester(["sha256"])
                sha256_digest = digester(path)["sha256"]
            except Exception as exc:
                yield skip_file("failed to compute digests: %s" % str(exc))
                return

            extant = client.get_asset_bypath(ds_identifier, "draft", relpath)
            if extant is not None and extant["sha256"] == sha256_digest:
                if existing == "error":
                    # as promised -- not gentle at all!
                    raise FileExistsError("file exists")
                if existing == "skip":
                    yield skip_file("file exists")
                    return
                # Logic below only for overwrite and reupload
                if existing == "overwrite":
                    if extant["sha256"] == sha256_digest:
                        yield skip_file("file exists")
                        return
                elif existing == "refresh":
                    pass
                elif existing == "force":
                    pass
                else:
                    raise ValueError("existing")

            #
            # Validate first, so we do not bother server at all if not kosher
            #
            # TODO: enable back validation of dandiset.yaml
            if path.name != dandiset_metadata_file and validation != "skip":
                yield {"status": "validating"}
                validation_errors = validate_file(path)
                yield {"errors": len(validation_errors)}
                # TODO: split for dandi, pynwb errors
                if validation_errors:
                    if validation == "require":
                        yield skip_file("failed validation")
                        return
                else:
                    yield {"status": "validated"}
            else:
                # yielding empty causes pyout to get stuck or crash
                # https://github.com/pyout/pyout/issues/91
                # yield {"errors": '',}
                pass

            #
            # Special handling for dandiset.yaml
            # Yarik hates it but that is life for now. TODO
            #
            if path.name == dandiset_metadata_file:
                # TODO This is a temporary measure to avoid breaking web UI
                # dandiset metadata schema assumptions.  All edits should happen
                # online.
                if upload_dandiset_metadata:
                    yield {"status": "updating metadata"}
                    client.set_dandiset_metadata(dandiset.identifier,
                                                 metadata=dandiset.metadata)
                    yield {"status": "updated metadata"}
                else:
                    yield skip_file("should be edited online")
                return

            #
            # Extract metadata - delayed since takes time, but is done before
            # actual upload, so we could skip if this fails
            #
            # Extract metadata before actual upload and skip if fails
            # TODO: allow for for non-nwb files to skip this step
            # ad-hoc for dandiset.yaml for now
            yield {"status": "extracting metadata"}
            try:
                asset_metadata = nwb2asset(path,
                                           digest=sha256_digest,
                                           digest_type="SHA256")
            except Exception as exc:
                if allow_any_path:
                    yield {"status": "failed to extract metadata"}
                    metadata = {
                        "contentSize": os.path.getsize(path),
                        "digest": sha256_digest,
                        "digest_type": "SHA256",
                        # "encodingFormat": # TODO
                    }
                else:
                    yield skip_file("failed to extract metadata: %s" %
                                    str(exc))
                    return
            else:
                # We need to convert to a `dict` this way instead of with
                # `.dict()` so that enums will be converted to strings.
                metadata = json.loads(
                    asset_metadata.json(exclude_unset=True, exclude_none=True))

            #
            # Upload file
            #
            yield {"status": "uploading"}
            for r in client.iter_upload(ds_identifier, "draft", str(relpath),
                                        metadata, str(path)):
                if r["status"] == "uploading":
                    uploaded_paths[str(path)]["size"] = r["current"]
                yield r
            yield {"status": "done"}

        except Exception as exc:
            if devel_debug:
                raise
            # Custom formatting for some exceptions we know to extract
            # user-meaningful message
            message = str(exc)
            uploaded_paths[str(path)]["errors"].append(message)
            yield {"status": "ERROR", "message": message}
        finally:
            process_paths.remove(str(path))

    # We will again use pyout to provide a neat table summarizing our progress
    # with upload etc
    import pyout

    from .support import pyout as pyouts

    # for the upload speeds we need to provide a custom  aggregate
    t0 = time.time()

    def upload_agg(*ignored):
        dt = time.time() - t0
        total = sum(v["size"] for v in uploaded_paths.values())
        if not total:
            return ""
        speed = total / dt if dt else 0
        return "%s/s" % naturalsize(speed)

    pyout_style = pyouts.get_style(hide_if_missing=False)
    pyout_style["upload"]["aggregate"] = upload_agg

    rec_fields = ["path", "size", "errors", "upload", "status", "message"]
    out = pyout.Tabular(style=pyout_style, columns=rec_fields)

    with out, client.session():
        for path in paths:
            while len(process_paths) >= 10:
                lgr.log(2, "Sleep waiting for some paths to finish processing")
                time.sleep(0.5)

            rec = {"path": str(path)}
            process_paths.add(str(path))

            try:
                relpath = path.absolute().relative_to(dandiset.path)

                rec["path"] = str(relpath)
                if devel_debug:
                    # DEBUG: do serially
                    for v in process_path(path, relpath):
                        print(str(v), flush=True)
                else:
                    rec[tuple(rec_fields[1:])] = process_path(path, relpath)
            except ValueError as exc:
                if "does not start with" in str(exc):
                    # if top_path is not the top path for the path
                    # Provide more concise specific message without path details
                    rec.update(skip_file("must be a child of top path"))
                else:
                    rec.update(skip_file(exc))
            out(rec)
示例#17
0
 def _scan(self):
     """Look through the root folder and compile a flat list of images."""
     if not os.path.exists(self.root_folder):
         raise RuntimeError('%s Folder (%s) does not exist' % (self.CLASS_TAG, self.root_folder))
     for path in sorted(Path(self.root_folder).rglob('*.jpg')):
         self.local_images.append(ProcessedImage(os.path.join(self.root_folder, str(path.absolute()))))
示例#18
0
def get_free_file(path, bytes=True, max_files=100, force_suffix=False, start_suffix=None):
    """
    Return a file handle to an unused filename. If 'path' is free, return a handle
    to that. Otherwise, append a number to it until a free filename is found or the
    number exceeds 'max_files'. In the latter case, raise 'IOError'.

    Returning a file handle, rather than just a file name, avoids the possibility of a
    race condition (a new file of the same name could be created between the time
    where one finds a free filename and then opens the file).

    Parameters
    ----------
    path: str
        Path name. Can be absolute or relative to the current directory.
    bytes: bool (Default: True)
        (Optional) Specify whether to open the file for byte (True) or plain
        text (False) output. Default is to open for byte output, which
        is suitable for passing to `numpy.save`.
    max_files: int
        (Optional) Maximum allowed number of files with the same name. If this
        number is exceeded, IOError is raised.
    force_suffix: bool (default False)
        (Optional) If True, a suffix '_#', where # is a number, is always added
        to the file name. Forcing suffixes also changes the default value
        of 'start_suffix' to 1.
    start_suffix: int (default 2)
        If creating a file with 'path' is unsuccessful (or 'force_suffix is
        set to True), this is the first number to try appending to the file name.

    Returns
    -------
    filehandle
        Write-only filehandle, as obtained from a call to
        `open(pathname, 'mode='xb')`.
    pathname: str
        Pathname (including the possibly appended number) of the opened file.
    """

    # Get a full path
    # TODO: is cwd always what we want here ?
    if isinstance(path, Path):
        pathname = str(path.absolute())
    elif path[0] == '/':
        #path is already a full path name
        pathname = path
    else:
        #Make a full path from path
        pathname = os.path.abspath(path)

    # Set the default value for start_suffix
    if start_suffix is None:
        start_suffix = 1 if force_suffix else 2

    # Set the mode
    if bytes:
        mode = 'xb'
    else:
        mode = 'x'

    # Make sure the directory exists
    os.makedirs(os.path.dirname(pathname), exist_ok=True)

    try:
        if force_suffix:
            raise IOError
        else:
            f = open(pathname, mode=mode)
            return f, pathname
    except IOError:
        name, ext = os.path.splitext(pathname)
        for i in range(start_suffix, max_files+start_suffix):
            appendedname = name + "_" + str(i) + ext
            try:
                f = open(appendedname, mode=mode)
                return f, appendedname
            except IOError:
                continue

        raise IOError("Number of files with the name '{}' has exceeded limit."
                      .format(path))
示例#19
0
def calibrate_checkerboard(board_vid,
                           m_corners,
                           n_corners,
                           framerate=30,
                           do_debug=True):
    """
    Finds internal corners of checkerboards to generate the camera matrix.

    Parameters:
    -----------
    board_vid (str): Path to .mp4 checkerboard video or path to a folder containing 
        checkerboard .jpgs.
    m_corners (int): Number of internal corners along the rows of the checkerboard
    n_corners (int): Number of internal corners along the columns of the checkerboard
    framerate (int): Framerate with which `board_vid` was recorded
    do_debug (bool): If True, will show a live feed of the labelled checkerboards, and
        will save a directory of the labelled checkerboard .jpgs. Default is True. 

    Returns:
    --------
    A dictionary of length 6 that consists of ret, cam_mtx, dist, r_vecs, t_vecs 
    from cv2.calibrateCamera() and the mean reprojection error. Saves this dictionary as
    a pickle file called 'cam_calib_results.pkl'. If this file already exists, running
    this function will read the pickle file and return the contained dictionary. 
    In addition, saves at least a video of the labelled checkerboards.
    """

    board_vid = expanduser(board_vid)
    assert (basename(board_vid) != "checkerboards.mp4"
            ), "Rename 'checkerboards.mp4' to something else!"

    if Path(board_vid).is_file():
        assert (splitext(board_vid)[1] == ".mp4"
                ), "`board_vid` must be an '.mp4' file!"

    output_vid = path.join(dirname(board_vid), "checkerboards.mp4")
    pkl_file = path.join(dirname(board_vid), "cam_calib_results.pkl")

    if do_debug:

        proceed_debug = ask_yes_no(
            f"Debug mode is on, which means the script will actually delete things. Previous {basename(output_vid)} and {basename(pkl_file)} outputs will be deleted. Continue?"
        )

        if proceed_debug:
            boards_dir = path.join(dirname(board_vid), "checkerboards")
        else:
            exit("Quitting ...")

        if Path(boards_dir).is_dir():
            rmtree(boards_dir)
        else:
            mkdir(boards_dir)

        if Path(output_vid).is_file():
            Path(output_vid).unlink()

        if Path(pkl_file).is_file():
            Path(pkl_file).unlink()

    if Path(output_vid).is_file() and Path(pkl_file).is_file():

        print(
            f"{basename(output_vid)} already exists at {dirname(output_vid)}")
        print(f"Reading {basename(pkl_file)} from {dirname(pkl_file)} ...")
        cam_calib_results = pickle.load(open(pkl_file, "rb"))
        msg = f"camera matrix: \n{cam_calib_results['cam_mtx']}\n\ndistortion coefficients: \n{cam_calib_results['dist']}\n\nmean reprojection error: \n{cam_calib_results['mean_reproj_error']}\n"
        print(msg)

        return cam_calib_results

    elif not Path(output_vid).is_file() and not Path(pkl_file).is_file():

        # Define the codec:
        fourcc = cv2.VideoWriter_fourcc(*"mp4v")

        # Set up corner-finding:
        # -----------------------
        # Termination criteria:
        criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30,
                    0.001)

        # Prepare object points like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0):
        obj_p = np.zeros((n_corners * m_corners, 3), np.float32)
        obj_p[:, :2] = np.mgrid[0:m_corners, 0:n_corners].T.reshape(-1, 2)

        # Arrays to store object points and image points from all the images:
        obj_points = []  # 3d point in real world space
        img_points = []  # 2d points in image plane
        # ------------------------

        i = 0

        if Path(board_vid).is_file():

            cap = cv2.VideoCapture(board_vid)
            out = cv2.VideoWriter(filename=output_vid,
                                  apiPreference=0,
                                  fourcc=fourcc,
                                  fps=int(framerate),
                                  frameSize=(int(cap.get(3)), int(cap.get(4))),
                                  params=None)

            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            pbar = trange(frame_count)

            for f, _ in enumerate(pbar):

                _, frame = cap.read()
                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

                # Find the checkerboard corners:
                ret, corners = cv2.findChessboardCorners(
                    gray, (m_corners, n_corners), None)

                # If found, add object points, image points (after refining them):
                if ret == True:

                    obj_points.append(obj_p)

                    # This method increases the accuracy of the identified corners:
                    better_corners = cv2.cornerSubPix(gray, corners, (11, 11),
                                                      (-1, -1), criteria)
                    img_points.append(better_corners)

                    # Draw and display the corners:
                    img = cv2.drawChessboardCorners(frame,
                                                    (m_corners, n_corners),
                                                    better_corners, ret)

                    # Save to video:
                    out.write(img)

                    if do_debug:

                        cv2.imwrite(
                            path.join(boards_dir, f"frame_{i:08d}.jpg"), img)
                        cv2.imshow("checkerboard detected ...", img)
                        if cv2.waitKey(1) & 0xFF == ord("q"):
                            break

                    pbar.set_description(
                        f"Found {i+1} checkerboards in {f+1}/{frame_count} frames"
                    )
                    cv2.waitKey(1)
                    i += 1

            cap.release()

        elif Path(board_vid).is_dir():

            jpgs = [
                str(path.absolute()) for path in Path(board_vid).rglob("*.jpg")
            ]

            if len(jpgs) == 0:
                raise ValueError("No '.jpg' images were found.")

            jpg_shape = get_img_shape(jpgs[0])  # from first image

            out = cv2.VideoWriter(filename=output_vid,
                                  apiPreference=0,
                                  fourcc=fourcc,
                                  fps=int(framerate),
                                  frameSize=(int(jpg_shape[1]),
                                             int(jpg_shape[0])),
                                  params=None)

            pbar = tqdm(jpgs)

            for f, jpg in enumerate(pbar):

                img = cv2.imread(jpg)
                gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

                # Find the checkerboard corners:
                ret, corners = cv2.findChessboardCorners(
                    gray, (m_corners, n_corners), None)

                # If found, add object points, image points (after refining them):
                if ret == True:

                    obj_points.append(obj_p)

                    # This method increases the accuracy of the identified corners:
                    better_corners = cv2.cornerSubPix(gray, corners, (11, 11),
                                                      (-1, -1), criteria)
                    img_points.append(better_corners)

                    # Draw and display the corners:
                    img = cv2.drawChessboardCorners(img,
                                                    (m_corners, n_corners),
                                                    better_corners, ret)

                    # Save to video:
                    out.write(img)

                    if do_debug:

                        cv2.imwrite(path.join(boards_dir, basename(jpg)), img)
                        cv2.imshow("checkerboard detected ...", img)
                        if cv2.waitKey(1) & 0xFF == ord("q"):
                            break

                    pbar.set_description(
                        f"Found {i+1} checkerboards in {f+1}/{len(jpgs)} frames"
                    )
                    cv2.waitKey(1)
                    i += 1

        out.release
        cv2.destroyAllWindows()

        # Calibrate:
        print(
            "Computing camera matrix from calibration data. If many checkerboards were found, will take a long while ..."
        )
        ret, cam_mtx, dist, r_vecs, t_vecs = cv2.calibrateCamera(
            obj_points, img_points, gray.shape[::-1], None, None)

        # Get re-projection error:
        total_reproj_error = 0
        for obj_point, img_point, r_vec, t_vec in zip(obj_points, img_points,
                                                      r_vecs, t_vecs):

            img_points_2, _ = cv2.projectPoints(obj_point, r_vec, t_vec,
                                                cam_mtx, dist)
            error = cv2.norm(img_point, img_points_2,
                             cv2.NORM_L2) / len(img_points_2)
            total_reproj_error += np.abs(error)

        mean_reproj_error = total_reproj_error / len(obj_points)

        # Output:
        msg = f"\ncamera matrix: \n{cam_mtx}\n\ndistortion coefficients: \n{dist}\n\nmean reprojection error: \n{mean_reproj_error}\n"
        print(msg)

        cam_calib_results = {
            "ret": ret,
            "cam_mtx": cam_mtx,
            "dist": dist,
            "r_vecs": r_vecs,
            "t_vecs": t_vecs,
            "mean_reproj_error": mean_reproj_error
        }
        pickle.dump(cam_calib_results, open(pkl_file, "wb"))

        return cam_calib_results

    else:
        exit(
            f"Only one of {basename(output_vid)} or {basename(pkl_file)} exists at {dirname(board_vid)}. \nPlease delete whichever one exists and re-run."
        )
示例#20
0
文件: upload.py 项目: satra/dandi-cli
def upload(
    paths,
    existing="refresh",
    validation="require",
    dandiset_path=None,
    girder_collection=collection_drafts,
    girder_top_folder=None,
    dandi_instance="dandi",
    fake_data=False,  # TODO: not implemented, prune?
    allow_any_path=False,
    upload_dandiset_metadata=False,
    devel_debug=False,
):
    from .dandiset import Dandiset
    from . import girder
    from .support.digests import Digester

    dandiset = Dandiset.find(dandiset_path)
    if not dandiset:
        raise RuntimeError(
            f"Found no {dandiset_metadata_file} anywhere.  "
            "Use 'dandi register', 'download', or 'organize' first")

    # Should no longer be needed
    # dandiset_path = Path(dandiset_path).resolve()

    instance = get_instance(dandi_instance)
    if instance.girder is None:
        assert instance.api is not None
        return _new_upload(
            instance.api,
            dandiset,
            paths,
            existing,
            validation,
            dandiset_path,
            allow_any_path,
            upload_dandiset_metadata,
            devel_debug,
        )

    if upload_dandiset_metadata:
        raise NotImplementedError(
            "Upload of dandiset metadata to Girder based server is not supported."
        )

    client = girder.get_client(instance.girder)

    # Girder side details:

    if not girder_collection:
        girder_collection = collection_drafts

    if not girder_top_folder:
        # We upload to staging/dandiset_id
        ds_identifier = dandiset.identifier
        if not ds_identifier:
            raise ValueError(
                "No 'identifier' set for the dandiset yet.  Use 'dandi register'"
            )
        if not re.match(dandiset_identifier_regex, ds_identifier):
            raise ValueError(
                f"Dandiset identifier {ds_identifier} does not follow expected "
                f"convention {dandiset_identifier_regex!r}.  Use "
                f"'dandi register' to get a legit identifier")
        # this is a path not a girder id
        girder_top_folder = ds_identifier
    girder_top_folder = PurePosixPath(girder_top_folder)

    if str(girder_top_folder) in (".", "..", "", "/"):
        raise ValueError(
            f"Got folder {girder_top_folder}, but files cannot be uploaded "
            f"into a collection directly.")

    import multiprocessing

    from .metadata import get_metadata
    from .pynwb_utils import get_object_id, ignore_benign_pynwb_warnings
    from .support.generatorify import generator_from_callback
    from .support.pyout import naturalsize
    from .utils import find_dandi_files, find_files, path_is_subpath
    from .validate import validate_file

    ignore_benign_pynwb_warnings()  # so validate doesn't whine

    try:
        collection_rec = girder.ensure_collection(client, girder_collection)
    except girder.gcl.HttpError as exc:
        if devel_debug:
            raise
        # provide a bit less intimidating error reporting
        lgr.error(
            "Failed to assure presence of the %s collection: %s",
            girder_collection,
            (girder.get_HttpError_response(exc)
             or {}).get("message", str(exc)),
        )
        sys.exit(1)

    lgr.debug("Working with collection %s", collection_rec)

    try:
        girder.lookup(client, girder_collection, path=girder_top_folder)
    except girder.GirderNotFound:
        raise ValueError(
            f"There is no {girder_top_folder} in {girder_collection}. "
            f"Did you use 'dandi register'?")

    #
    # Treat paths
    #
    if not paths:
        paths = [dandiset.path]

    # Expand and validate all paths -- they should reside within dandiset
    paths = find_files(".*",
                       paths) if allow_any_path else find_dandi_files(paths)
    paths = list(map(Path, paths))
    npaths = len(paths)
    lgr.info(f"Found {npaths} files to consider")
    for path in paths:
        if not (allow_any_path or path.name == dandiset_metadata_file
                or path.name.endswith(".nwb")):
            raise NotImplementedError(
                f"ATM only .nwb and dandiset.yaml should be in the paths to upload. Got {path}"
            )
        if not path_is_subpath(str(path.absolute()), dandiset.path):
            raise ValueError(f"{path} is not under {dandiset.path}")

    # We will keep a shared set of "being processed" paths so
    # we could limit the number of them until
    #   https://github.com/pyout/pyout/issues/87
    # properly addressed
    process_paths = set()
    from collections import defaultdict

    uploaded_paths = defaultdict(lambda: {"size": 0, "errors": []})

    def skip_file(msg):
        return {"status": "skipped", "message": str(msg)}

    lock = multiprocessing.Lock()

    # TODO: we might want to always yield a full record so no field is not
    # provided to pyout to cause it to halt
    def process_path(path, relpath):
        """

        Parameters
        ----------
        path: Path
          Non Pure (OS specific) Path
        relpath:
          For location on Girder.  Will be cast to PurePosixPath

        Yields
        ------
        dict
          Records for pyout
        """
        # Ensure consistent types
        path = Path(path)
        relpath = PurePosixPath(relpath)
        try:
            try:
                path_stat = path.stat()
                yield {"size": path_stat.st_size}
            except FileNotFoundError:
                yield skip_file("ERROR: File not found")
                return
            except Exception as exc:
                # without limiting [:50] it might cause some pyout indigestion
                yield skip_file("ERROR: %s" % str(exc)[:50])
                return

            yield {"status": "checking girder"}

            girder_folder = girder_top_folder / relpath.parent

            # we will add some fields which would help us with deciding to
            # reupload or not
            file_metadata_ = {
                "uploaded_size": path_stat.st_size,
                "uploaded_mtime": ensure_strtime(path_stat.st_mtime),
                # "uploaded_date": None,  # to be filled out upon upload completion
            }

            # A girder delete API target to .delete before uploading a file
            # (e.g. if decided to reupload)
            delete_before_upload = None

            def ensure_item():
                """This function might need to be called twice, e.g. if we
                are to reupload the entire item.

                ATM new versions of the files would create new items since
                the policy is one File per Item
                """
                try:
                    lock.acquire(timeout=60)
                    # TODO: we need to make this all thread safe all the way
                    #       until uploading the file since multiple threads would
                    #       create multiple
                    # ATM it even fails with  No such folder: 5e33658d6eb14e0bf49e97d5",
                    # so will first upload one file and then the rest... not sure why
                    # locking doesn't work
                    folder_rec = girder.ensure_folder(client, collection_rec,
                                                      girder_collection,
                                                      girder_folder)

                    # Get (if already exists) or create an item
                    item_rec = client.createItem(folder_rec["_id"],
                                                 name=relpath.name,
                                                 reuseExisting=True)
                finally:
                    lock.release()
                return item_rec

            def ensure_folder():
                try:
                    lock.acquire(timeout=60)
                    folder_rec = girder.ensure_folder(client, collection_rec,
                                                      girder_collection,
                                                      girder_folder)
                finally:
                    lock.release()
                return folder_rec

            #
            # 1. Validate first, so we do not bother girder at all if not kosher
            #
            # TODO: enable back validation of dandiset.yaml
            if path.name != dandiset_metadata_file and validation != "skip":
                yield {"status": "validating"}
                validation_errors = validate_file(path)
                yield {"errors": len(validation_errors)}
                # TODO: split for dandi, pynwb errors
                if validation_errors:
                    if validation == "require":
                        yield skip_file("failed validation")
                        return
                else:
                    yield {"status": "validated"}
            else:
                # yielding empty causes pyout to get stuck or crash
                # https://github.com/pyout/pyout/issues/91
                # yield {"errors": '',}
                pass

            #
            # Special handling for dandiset.yaml
            # Yarik hates it but that is life for now. TODO
            #
            if path.name == dandiset_metadata_file:
                # TODO This is a temporary measure to avoid breaking web UI
                # dandiset metadata schema assumptions.  All edits should happen
                # online.
                yield skip_file("should be edited online")
                return
                # We need to upload its content as metadata for the entire
                # folder.
                folder_rec = ensure_folder()
                remote_metadata = folder_rec["meta"]
                if remote_metadata.get("dandiset", {}) == dandiset.metadata:
                    yield skip_file("exists (same)")
                else:
                    remote_metadata["dandiset"] = dandiset.metadata
                    yield {"status": "uploading dandiset metadata"}
                    client.addMetadataToFolder(folder_rec["_id"],
                                               remote_metadata)
                    yield {"status": "done"}
                # Interrupt -- no file to upload
                return

            #
            # 2. Ensure having an item
            #
            item_rec = ensure_item()

            #
            # 3. Analyze possibly present on the remote files in the item
            #
            file_recs = list(client.listFile(item_rec["_id"]))

            # get metadata and if we have all indications that it is
            # probably the same -- we just skip
            stat_fields = [
                # Care only about mtime, ignore ctime which could change
                "uploaded_mtime",
                "uploaded_size",
            ]
            assert sorted(file_metadata_) == stat_fields
            item_file_metadata_ = {
                k: item_rec.get("meta", {}).get(k, None)
                for k in stat_fields
            }
            lgr.debug(
                "Files meta: local file: %s  remote file: %s",
                file_metadata_,
                item_file_metadata_,
            )

            if item_file_metadata_["uploaded_mtime"]:
                local_mtime = ensure_datetime(file_metadata_["uploaded_mtime"])
                remote_mtime = ensure_datetime(
                    item_file_metadata_.get("uploaded_mtime"))
                remote_file_status = (
                    "same" if (file_metadata_ == item_file_metadata_) else
                    ("newer" if remote_mtime > local_mtime else
                     ("older" if remote_mtime < local_mtime else "diff")))
            else:
                remote_file_status = "no mtime"
            exists_msg = f"exists ({remote_file_status})"

            if file_recs:  # there is a file already
                if len(file_recs) > 1:
                    lgr.debug(
                        f"Item {item_rec} contains multiple files: {file_recs}"
                    )
                if existing == "error":
                    # as promised -- not gentle at all!
                    raise FileExistsError(exists_msg)
                if existing == "skip":
                    yield skip_file(exists_msg)
                    return
                # Logic below only for overwrite and reupload
                if existing == "overwrite":
                    if remote_file_status == "same":
                        yield skip_file(exists_msg)
                        return
                elif existing == "refresh":
                    if not remote_file_status == "older":
                        yield skip_file(exists_msg)
                        return
                elif existing == "force":
                    pass
                else:
                    raise ValueError("existing")

                delete_before_upload = f'/item/{item_rec["_id"]}'

                yield {"message": exists_msg + " - reuploading"}

            #
            # 4. Extract metadata - delayed since takes time, but is done
            #    before actual upload, so we could skip if this fails
            #
            # Extract metadata before actual upload and skip if fails
            # TODO: allow for for non-nwb files to skip this step
            # ad-hoc for dandiset.yaml for now
            if path.name != dandiset_metadata_file:
                yield {"status": "extracting metadata"}
                try:
                    metadata = get_metadata(path)
                except Exception as exc:
                    if allow_any_path:
                        yield {"status": "failed to extract metadata"}
                        metadata = {}
                    else:
                        yield skip_file("failed to extract metadata: %s" %
                                        str(exc))
                        if not file_recs:
                            # remove empty item
                            yield {"status": "deleting empty item"}
                            client.delete(f'/item/{item_rec["_id"]}')
                            yield {"status": "deleted empty item"}
                        return

            #
            # ?. Compute checksums and possible other digests (e.g. for s3, ipfs - TODO)
            #
            yield {"status": "digesting"}
            try:
                # TODO: in theory we could also cache the result, but since it is
                # critical to get correct checksums, safer to just do it all the time.
                # Should typically be faster than upload itself ;-)
                digester = Digester(metadata_digests)
                file_metadata_.update(digester(path))
            except Exception as exc:
                yield skip_file("failed to compute digests: %s" % str(exc))
                return

            #
            # 5. Upload file
            #
            # TODO: we could potentially keep new item "hidden" until we are
            #  done with upload, and only then remove old one and replace with
            #  a new one (rename from "hidden" name).
            if delete_before_upload:
                yield {"status": "deleting old"}
                client.delete(delete_before_upload)
                yield {"status": "old deleted"}
                # create a a new item
                item_rec = ensure_item()

            yield {"status": "uploading"}
            # Upload file to an item
            # XXX TODO progress reporting back to pyout is actually tricky
            #     if possible to implement via callback since
            #     callback would need to yield somehow from the context here.
            #     yoh doesn't see how that could be done yet. In the worst
            #     case we would copy uploadFileToItem and _uploadContents
            #     and make them into generators to relay progress instead of
            #     via callback
            # https://stackoverflow.com/questions/9968592/turn-functions-with-a-callback-into-python-generators
            # has some solutions but all IMHO are abit too complex

            for r in generator_from_callback(lambda c: client.uploadFileToItem(
                    item_rec["_id"], str(path), progressCallback=c)):
                upload_perc = 100 * (
                    (r["current"] / r["total"]) if r["total"] else 1.0)
                if girder._DANDI_LOG_GIRDER:
                    girder.lgr.debug(
                        "PROGRESS[%s]: done=%d %%done=%s",
                        str(path),
                        r["current"],
                        upload_perc,
                    )
                uploaded_paths[str(path)]["size"] = r["current"]
                yield {"upload": upload_perc}

            # Get uploaded file id
            file_id, current = client.isFileCurrent(item_rec["_id"], path.name,
                                                    path.absolute())
            if not current:
                yield skip_file("File on server was unexpectedly changed")
                return

            # Compare file size against what download headers report
            # S3 doesn't seem to allow HEAD requests, so we need to instead do
            # a GET with a streaming response and not read the body.
            with client.sendRestRequest("GET",
                                        f"file/{file_id}/download",
                                        jsonResp=False,
                                        stream=True) as r:
                if int(r.headers["Content-Length"]) != path.stat().st_size:
                    yield skip_file(
                        "File size on server does not match local file")
                    return

            #
            # 6. Upload metadata
            #
            metadata_ = {}
            for k, v in metadata.items():
                if v in ("", None):
                    continue  # degenerate, why bother
                # XXX TODO: remove this -- it is only temporary, search should handle
                if isinstance(v, str):
                    metadata_[k] = v.lower()
                elif isinstance(v, datetime):
                    metadata_[k] = ensure_strtime(v)
            # we will add some fields which would help us with deciding to
            # reupload or not
            # .isoformat() would give is8601 representation but I see in girder
            # already
            # session_start_time   1971-01-01 12:00:00+00:00
            # decided to go for .isoformat for internal consistency -- let's see
            file_metadata_["uploaded_datetime"] = ensure_strtime(time.time())
            metadata_.update(file_metadata_)
            metadata_["uploaded_size"] = path_stat.st_size
            metadata_["uploaded_mtime"] = ensure_strtime(path_stat.st_mtime)
            metadata_["uploaded_by"] = "dandi %s" % __version__
            # Also store object_id for the file to help identify changes/moves
            try:
                metadata_["uploaded_nwb_object_id"] = get_object_id(str(path))
            except Exception as exc:
                (lgr.debug if allow_any_path else lgr.warning)(
                    "Failed to read object_id: %s", exc)

            # #
            # # 7. Also set remote file ctime to match local mtime
            # #   since for type "file", Resource has no "updated" field.
            # #   and this could us help to identify changes being done
            # #   to the remote file -- if metadata["uploaded_mtime"]
            # #   differs
            # yield {"status": "setting remote file timestamp"}
            # try:
            #     client.setResourceTimestamp(
            #         file_id, type="file", created=metadata_["uploaded_mtime"]
            #     )
            # except girder.gcl.HttpError as exc:
            #     if devel_debug:
            #         raise
            #     response = girder.get_HttpError_response(exc)
            #     message = response.get("message", str(exc))
            #     yield {"status": "WARNING", "message": message}

            # 7. Upload metadata
            yield {"status": "uploading metadata"}
            client.addMetadataToItem(item_rec["_id"], metadata_)
            yield {"status": "done"}

        except Exception as exc:
            if devel_debug:
                raise
            # Custom formatting for some exceptions we know to extract
            # user-meaningful message
            message = str(exc)
            if isinstance(exc, girder.gcl.HttpError):
                response = girder.get_HttpError_response(exc)
                if "message" in response:
                    message = response["message"]
            uploaded_paths[str(path)]["errors"].append(message)
            yield {"status": "ERROR", "message": message}
        finally:
            process_paths.remove(str(path))

    # We will again use pyout to provide a neat table summarizing our progress
    # with upload etc
    import pyout

    from .support import pyout as pyouts

    # for the upload speeds we need to provide a custom  aggregate
    t0 = time.time()

    def upload_agg(*ignored):
        dt = time.time() - t0
        total = sum(v["size"] for v in uploaded_paths.values())
        if not total:
            return ""
        speed = total / dt if dt else 0
        return "%s/s" % naturalsize(speed)

    pyout_style = pyouts.get_style(hide_if_missing=False)
    pyout_style["upload"]["aggregate"] = upload_agg

    rec_fields = ["path", "size", "errors", "upload", "status", "message"]
    out = pyout.Tabular(style=pyout_style, columns=rec_fields)

    with out, client.lock_dandiset(dandiset.identifier):
        for path in paths:
            while len(process_paths) >= 10:
                lgr.log(2, "Sleep waiting for some paths to finish processing")
                time.sleep(0.5)

            rec = {"path": str(path)}
            process_paths.add(str(path))

            try:
                relpath = path.absolute().relative_to(dandiset.path)

                rec["path"] = str(relpath)
                if devel_debug:
                    # DEBUG: do serially
                    for v in process_path(path, relpath):
                        print(str(v), flush=True)
                else:
                    rec[tuple(rec_fields[1:])] = process_path(path, relpath)
            except ValueError as exc:
                if "does not start with" in str(exc):
                    # if top_path is not the top path for the path
                    # Provide more concise specific message without path details
                    rec.update(skip_file("must be a child of top path"))
                else:
                    rec.update(skip_file(exc))
            out(rec)
示例#21
0
 def __iter__(self):
     for path in self._path.iterdir():
         if not path.match('.*') and path.is_file():
             yield Image.open(str(path.absolute()))
示例#22
0
 def validate_disk_file(parser: argparse, arg: str) -> str:
     path: pathlib.Path = pathlib.Path(arg)
     abs_file_path: str = str(path.absolute().as_posix())
     return abs_file_path
示例#23
0
 def _open(self, article: Optional[Article]):
     if article:
         path = self.article_path(article)
     else:
         path = self.index_path(self.project.articles.root)
     webbrowser.open(path.absolute().as_uri())
示例#24
0
 def __iter__(self):
     for path in self._path.iterdir():
         if not path.match('.*') and path.is_file():
             yield Image.open(str(path.absolute()))
示例#25
0
文件: upload.py 项目: satra/dandi-cli
    def process_path(path, relpath):
        """

        Parameters
        ----------
        path: Path
          Non Pure (OS specific) Path
        relpath:
          For location on Girder.  Will be cast to PurePosixPath

        Yields
        ------
        dict
          Records for pyout
        """
        # Ensure consistent types
        path = Path(path)
        relpath = PurePosixPath(relpath)
        try:
            try:
                path_stat = path.stat()
                yield {"size": path_stat.st_size}
            except FileNotFoundError:
                yield skip_file("ERROR: File not found")
                return
            except Exception as exc:
                # without limiting [:50] it might cause some pyout indigestion
                yield skip_file("ERROR: %s" % str(exc)[:50])
                return

            yield {"status": "checking girder"}

            girder_folder = girder_top_folder / relpath.parent

            # we will add some fields which would help us with deciding to
            # reupload or not
            file_metadata_ = {
                "uploaded_size": path_stat.st_size,
                "uploaded_mtime": ensure_strtime(path_stat.st_mtime),
                # "uploaded_date": None,  # to be filled out upon upload completion
            }

            # A girder delete API target to .delete before uploading a file
            # (e.g. if decided to reupload)
            delete_before_upload = None

            def ensure_item():
                """This function might need to be called twice, e.g. if we
                are to reupload the entire item.

                ATM new versions of the files would create new items since
                the policy is one File per Item
                """
                try:
                    lock.acquire(timeout=60)
                    # TODO: we need to make this all thread safe all the way
                    #       until uploading the file since multiple threads would
                    #       create multiple
                    # ATM it even fails with  No such folder: 5e33658d6eb14e0bf49e97d5",
                    # so will first upload one file and then the rest... not sure why
                    # locking doesn't work
                    folder_rec = girder.ensure_folder(client, collection_rec,
                                                      girder_collection,
                                                      girder_folder)

                    # Get (if already exists) or create an item
                    item_rec = client.createItem(folder_rec["_id"],
                                                 name=relpath.name,
                                                 reuseExisting=True)
                finally:
                    lock.release()
                return item_rec

            def ensure_folder():
                try:
                    lock.acquire(timeout=60)
                    folder_rec = girder.ensure_folder(client, collection_rec,
                                                      girder_collection,
                                                      girder_folder)
                finally:
                    lock.release()
                return folder_rec

            #
            # 1. Validate first, so we do not bother girder at all if not kosher
            #
            # TODO: enable back validation of dandiset.yaml
            if path.name != dandiset_metadata_file and validation != "skip":
                yield {"status": "validating"}
                validation_errors = validate_file(path)
                yield {"errors": len(validation_errors)}
                # TODO: split for dandi, pynwb errors
                if validation_errors:
                    if validation == "require":
                        yield skip_file("failed validation")
                        return
                else:
                    yield {"status": "validated"}
            else:
                # yielding empty causes pyout to get stuck or crash
                # https://github.com/pyout/pyout/issues/91
                # yield {"errors": '',}
                pass

            #
            # Special handling for dandiset.yaml
            # Yarik hates it but that is life for now. TODO
            #
            if path.name == dandiset_metadata_file:
                # TODO This is a temporary measure to avoid breaking web UI
                # dandiset metadata schema assumptions.  All edits should happen
                # online.
                yield skip_file("should be edited online")
                return
                # We need to upload its content as metadata for the entire
                # folder.
                folder_rec = ensure_folder()
                remote_metadata = folder_rec["meta"]
                if remote_metadata.get("dandiset", {}) == dandiset.metadata:
                    yield skip_file("exists (same)")
                else:
                    remote_metadata["dandiset"] = dandiset.metadata
                    yield {"status": "uploading dandiset metadata"}
                    client.addMetadataToFolder(folder_rec["_id"],
                                               remote_metadata)
                    yield {"status": "done"}
                # Interrupt -- no file to upload
                return

            #
            # 2. Ensure having an item
            #
            item_rec = ensure_item()

            #
            # 3. Analyze possibly present on the remote files in the item
            #
            file_recs = list(client.listFile(item_rec["_id"]))

            # get metadata and if we have all indications that it is
            # probably the same -- we just skip
            stat_fields = [
                # Care only about mtime, ignore ctime which could change
                "uploaded_mtime",
                "uploaded_size",
            ]
            assert sorted(file_metadata_) == stat_fields
            item_file_metadata_ = {
                k: item_rec.get("meta", {}).get(k, None)
                for k in stat_fields
            }
            lgr.debug(
                "Files meta: local file: %s  remote file: %s",
                file_metadata_,
                item_file_metadata_,
            )

            if item_file_metadata_["uploaded_mtime"]:
                local_mtime = ensure_datetime(file_metadata_["uploaded_mtime"])
                remote_mtime = ensure_datetime(
                    item_file_metadata_.get("uploaded_mtime"))
                remote_file_status = (
                    "same" if (file_metadata_ == item_file_metadata_) else
                    ("newer" if remote_mtime > local_mtime else
                     ("older" if remote_mtime < local_mtime else "diff")))
            else:
                remote_file_status = "no mtime"
            exists_msg = f"exists ({remote_file_status})"

            if file_recs:  # there is a file already
                if len(file_recs) > 1:
                    lgr.debug(
                        f"Item {item_rec} contains multiple files: {file_recs}"
                    )
                if existing == "error":
                    # as promised -- not gentle at all!
                    raise FileExistsError(exists_msg)
                if existing == "skip":
                    yield skip_file(exists_msg)
                    return
                # Logic below only for overwrite and reupload
                if existing == "overwrite":
                    if remote_file_status == "same":
                        yield skip_file(exists_msg)
                        return
                elif existing == "refresh":
                    if not remote_file_status == "older":
                        yield skip_file(exists_msg)
                        return
                elif existing == "force":
                    pass
                else:
                    raise ValueError("existing")

                delete_before_upload = f'/item/{item_rec["_id"]}'

                yield {"message": exists_msg + " - reuploading"}

            #
            # 4. Extract metadata - delayed since takes time, but is done
            #    before actual upload, so we could skip if this fails
            #
            # Extract metadata before actual upload and skip if fails
            # TODO: allow for for non-nwb files to skip this step
            # ad-hoc for dandiset.yaml for now
            if path.name != dandiset_metadata_file:
                yield {"status": "extracting metadata"}
                try:
                    metadata = get_metadata(path)
                except Exception as exc:
                    if allow_any_path:
                        yield {"status": "failed to extract metadata"}
                        metadata = {}
                    else:
                        yield skip_file("failed to extract metadata: %s" %
                                        str(exc))
                        if not file_recs:
                            # remove empty item
                            yield {"status": "deleting empty item"}
                            client.delete(f'/item/{item_rec["_id"]}')
                            yield {"status": "deleted empty item"}
                        return

            #
            # ?. Compute checksums and possible other digests (e.g. for s3, ipfs - TODO)
            #
            yield {"status": "digesting"}
            try:
                # TODO: in theory we could also cache the result, but since it is
                # critical to get correct checksums, safer to just do it all the time.
                # Should typically be faster than upload itself ;-)
                digester = Digester(metadata_digests)
                file_metadata_.update(digester(path))
            except Exception as exc:
                yield skip_file("failed to compute digests: %s" % str(exc))
                return

            #
            # 5. Upload file
            #
            # TODO: we could potentially keep new item "hidden" until we are
            #  done with upload, and only then remove old one and replace with
            #  a new one (rename from "hidden" name).
            if delete_before_upload:
                yield {"status": "deleting old"}
                client.delete(delete_before_upload)
                yield {"status": "old deleted"}
                # create a a new item
                item_rec = ensure_item()

            yield {"status": "uploading"}
            # Upload file to an item
            # XXX TODO progress reporting back to pyout is actually tricky
            #     if possible to implement via callback since
            #     callback would need to yield somehow from the context here.
            #     yoh doesn't see how that could be done yet. In the worst
            #     case we would copy uploadFileToItem and _uploadContents
            #     and make them into generators to relay progress instead of
            #     via callback
            # https://stackoverflow.com/questions/9968592/turn-functions-with-a-callback-into-python-generators
            # has some solutions but all IMHO are abit too complex

            for r in generator_from_callback(lambda c: client.uploadFileToItem(
                    item_rec["_id"], str(path), progressCallback=c)):
                upload_perc = 100 * (
                    (r["current"] / r["total"]) if r["total"] else 1.0)
                if girder._DANDI_LOG_GIRDER:
                    girder.lgr.debug(
                        "PROGRESS[%s]: done=%d %%done=%s",
                        str(path),
                        r["current"],
                        upload_perc,
                    )
                uploaded_paths[str(path)]["size"] = r["current"]
                yield {"upload": upload_perc}

            # Get uploaded file id
            file_id, current = client.isFileCurrent(item_rec["_id"], path.name,
                                                    path.absolute())
            if not current:
                yield skip_file("File on server was unexpectedly changed")
                return

            # Compare file size against what download headers report
            # S3 doesn't seem to allow HEAD requests, so we need to instead do
            # a GET with a streaming response and not read the body.
            with client.sendRestRequest("GET",
                                        f"file/{file_id}/download",
                                        jsonResp=False,
                                        stream=True) as r:
                if int(r.headers["Content-Length"]) != path.stat().st_size:
                    yield skip_file(
                        "File size on server does not match local file")
                    return

            #
            # 6. Upload metadata
            #
            metadata_ = {}
            for k, v in metadata.items():
                if v in ("", None):
                    continue  # degenerate, why bother
                # XXX TODO: remove this -- it is only temporary, search should handle
                if isinstance(v, str):
                    metadata_[k] = v.lower()
                elif isinstance(v, datetime):
                    metadata_[k] = ensure_strtime(v)
            # we will add some fields which would help us with deciding to
            # reupload or not
            # .isoformat() would give is8601 representation but I see in girder
            # already
            # session_start_time   1971-01-01 12:00:00+00:00
            # decided to go for .isoformat for internal consistency -- let's see
            file_metadata_["uploaded_datetime"] = ensure_strtime(time.time())
            metadata_.update(file_metadata_)
            metadata_["uploaded_size"] = path_stat.st_size
            metadata_["uploaded_mtime"] = ensure_strtime(path_stat.st_mtime)
            metadata_["uploaded_by"] = "dandi %s" % __version__
            # Also store object_id for the file to help identify changes/moves
            try:
                metadata_["uploaded_nwb_object_id"] = get_object_id(str(path))
            except Exception as exc:
                (lgr.debug if allow_any_path else lgr.warning)(
                    "Failed to read object_id: %s", exc)

            # #
            # # 7. Also set remote file ctime to match local mtime
            # #   since for type "file", Resource has no "updated" field.
            # #   and this could us help to identify changes being done
            # #   to the remote file -- if metadata["uploaded_mtime"]
            # #   differs
            # yield {"status": "setting remote file timestamp"}
            # try:
            #     client.setResourceTimestamp(
            #         file_id, type="file", created=metadata_["uploaded_mtime"]
            #     )
            # except girder.gcl.HttpError as exc:
            #     if devel_debug:
            #         raise
            #     response = girder.get_HttpError_response(exc)
            #     message = response.get("message", str(exc))
            #     yield {"status": "WARNING", "message": message}

            # 7. Upload metadata
            yield {"status": "uploading metadata"}
            client.addMetadataToItem(item_rec["_id"], metadata_)
            yield {"status": "done"}

        except Exception as exc:
            if devel_debug:
                raise
            # Custom formatting for some exceptions we know to extract
            # user-meaningful message
            message = str(exc)
            if isinstance(exc, girder.gcl.HttpError):
                response = girder.get_HttpError_response(exc)
                if "message" in response:
                    message = response["message"]
            uploaded_paths[str(path)]["errors"].append(message)
            yield {"status": "ERROR", "message": message}
        finally:
            process_paths.remove(str(path))