示例#1
0
def text_dandiset(
        dandi_client: DandiAPIClient,
        tmp_path_factory: pytest.TempPathFactory) -> Iterator[Dict[str, Any]]:
    d = dandi_client.create_dandiset(
        "Dandiset for testing backups2datalad",
        {
            "schemaKey":
            "Dandiset",
            "name":
            "Dandiset for testing backups2datalad",
            "description":
            "A test text Dandiset",
            "contributor":
            [{
                "schemaKey": "Person",
                "name": "Wodder, John",
                "roleName": ["dcite:Author", "dcite:ContactPerson"],
            }],
            "license": ["spdx:CC0-1.0"],
            "manifestLocation": ["https://github.com/dandi/dandi-cli"],
        },
    )
    dandiset_id = d.identifier
    dspath = tmp_path_factory.mktemp("text_dandiset")
    (dspath /
     dandiset_metadata_file).write_text(f"identifier: '{dandiset_id}'\n")
    (dspath / "file.txt").write_text("This is test text.\n")
    (dspath / "v0.txt").write_text("Version 0\n")
    (dspath / "subdir1").mkdir()
    (dspath / "subdir1" / "apple.txt").write_text("Apple\n")
    (dspath / "subdir2").mkdir()
    (dspath / "subdir2" / "banana.txt").write_text("Banana\n")
    (dspath / "subdir2" / "coconut.txt").write_text("Coconut\n")

    def upload_dandiset(paths: Optional[List[str]] = None,
                        **kwargs: Any) -> None:
        upload(
            paths=paths or [dspath],
            dandi_instance="dandi-staging",
            devel_debug=True,
            allow_any_path=True,
            validation="skip",
            **kwargs,
        )

    try:
        upload_dandiset()
        yield {
            "client": dandi_client,
            "dspath": dspath,
            "dandiset": d,
            "dandiset_id": dandiset_id,
            "reupload": upload_dandiset,
        }
    finally:
        for v in d.get_versions():
            if v.identifier != "draft":
                dandi_client.delete(f"{d.api_path}versions/{v.identifier}/")
        d.delete()
示例#2
0
def clabel_table_create(
    common_acts, n_parts=12, data_lp="/data2/users/stepeter/files_nwb/downloads/000055/"
):
    """Create table of coarse label durations across participants.
    Labels to include in the table are specified by common_acts."""
    with DandiAPIClient() as client:
        paths = []
        for file in client.get_dandiset("000055", "draft").get_assets_with_path_prefix(""):
            paths.append(file.path)
    paths = natsort.natsorted(paths)

    vals_all = np.zeros([n_parts, len(common_acts) + 1])
    for part_ind in tqdm(range(n_parts)):
        fids = [val for val in paths if "sub-" + str(part_ind + 1).zfill(2) in val]
        for fid in fids:
            with DandiAPIClient() as client:
                asset = client.get_dandiset("000055", "draft").get_asset_by_path(fid)
                s3_path = asset.get_content_url(follow_redirects=1, strip_query=True)
            with NWBHDF5IO(s3_path, mode="r", driver="ros3") as io:
                nwb = io.read()

                curr_labels = nwb.intervals["epochs"].to_dataframe()
                durations = (
                    curr_labels.loc[:, "stop_time"].values
                    - curr_labels.loc[:, "start_time"].values
                )

                # Add up durations of each label
                for s, curr_act in enumerate(common_acts):
                    for i, curr_label in enumerate(curr_labels["labels"].tolist()):
                        if curr_act in curr_label.split(", "):
                            vals_all[part_ind, s] += durations[i] / 3600

                # Add up total durations of selected labels (avoid double counting)
                for i, curr_label in enumerate(curr_labels["labels"].tolist()):
                    in_lab_grp = False
                    for sub_lab in curr_label.split(", "):
                        if sub_lab in common_acts:
                            in_lab_grp = True
                    vals_all[part_ind, -1] += durations[i] / 3600 if in_lab_grp else 0
            del nwb, io

    # Make final table/dataframe
    common_acts_col = [val.lstrip("Blocklist (").rstrip(")") for val in common_acts]
    df_all = pd.DataFrame(
        vals_all.round(1),
        index=["P" + str(val + 1).zfill(2) for val in range(n_parts)],
        columns=common_acts_col + ["Total"],
    )
    return df_all
示例#3
0
def main(
    ctx: click.Context,
    asset_filter: Optional[re.Pattern[str]],
    dandi_instance: str,
    force: Optional[str],
    jobs: int,
    log_level: int,
    pdb: bool,
    quiet_debug: bool,
    target: Path,
    s3bucket: str,
) -> None:
    ctx.obj = DandiDatasetter(
        dandi_client=ctx.with_resource(
            DandiAPIClient.for_dandi_instance(dandi_instance)),
        target_path=target,
        config=Config(
            asset_filter=asset_filter,
            jobs=jobs,
            force=force,
            s3bucket=s3bucket,
        ),
    )
    if pdb:
        sys.excepthook = pdb_excepthook
    if quiet_debug:
        log.setLevel(logging.DEBUG)
        log_level = logging.INFO
    logging.basicConfig(
        format="%(asctime)s [%(levelname)-8s] %(name)s %(message)s",
        datefmt="%Y-%m-%dT%H:%M:%S%z",
        level=log_level,
        force=True,  # Override dandi's settings
    )
    ctx.obj.debug_logfile()
示例#4
0
def main():
    g_client = GirderCli("http://3.19.164.171")
    a_client = DandiAPIClient("https://api.dandiarchive.org/api")

    with a_client.session():
        g_client.dandi_authenticate()
        # gather all dandisets known to girder: hardcoded _id for "drafts" collection
        g_dandisets = list(
            g_client.listFolder("5e59bb0af19e820ab6ea6c62", "collection"))
        for dandiset, girder_id in [(x["name"], x["_id"])
                                    for x in g_dandisets]:
            if dandiset != "000026":
                continue
            print(f"DANDI:{dandiset}", end="\t")
            g_meta, g_assets_ = g_client.get_dandiset_and_assets(
                girder_id, "folder")
            g_assets = list(g_assets_)
            # harmonize and get only what we care about ATM - path and size,
            # or otherwise we would need to query each asset for metadata
            g_assets_h = set(
                (a["path"].lstrip("/"), a["size"]) for a in g_assets)

            # Yarik trusts nobody.  Two identical bugs are less likely!
            g_assets_adhoc = set(adhoc_list_girder(girder_id, g_client))

            if g_assets_h != g_assets_adhoc:
                print("ad-hoc and dandi listing of girder differs!")
                import pdb

                pdb.set_trace()

            a_meta, a_assets_ = a_client.get_dandiset_and_assets(
                dandiset, "draft")
            a_assets = list(a_assets_)
            a_assets_h = set(
                (a["path"].lstrip("/"), a["size"]) for a in a_assets)

            if a_assets_h != g_assets_h:
                print("differs")
                import pdb

                pdb.set_trace()
            else:
                print(f"{len(a_assets)} assets the same")
示例#5
0
from dandi.dandiapi import DandiAPIClient

from .spec_utils import project_power, proj_mat_compute

# Set parameters
sp = ''  # save path
win_spec_len = 30  # sec
large_win = 30 * 60  # sec
fs = 500  # Hz
freq_range = [3, 125]  # Hz
hgrid_fid = "headGrid.mat"
aal_fid = "aal_rois.mat"
n_parts = 12  # number of participants

# Determine all file paths
with DandiAPIClient() as client:
    paths = []
    for file in client.get_dandiset("000055",
                                    "draft").get_assets_under_path(""):
        paths.append(file.path)
paths = natsort.natsorted(paths)

# Create ROI projection matrices
elec_dens_thresh = 3  # threshold for dipole density
proj_mats = []
for s in range(n_parts):
    fid = [val for val in paths if "sub-" + str(s + 1).zfill(2) in val][0]
    with DandiAPIClient() as client:
        asset = client.get_dandiset("000055", "draft").get_asset_by_path(fid)
        s3_path = asset.get_content_url(follow_redirects=1, strip_query=True)
示例#6
0
def dandi_client() -> DandiAPIClient:
    api_token = os.environ["DANDI_API_KEY"]
    with DandiAPIClient.for_dandi_instance("dandi-staging",
                                           token=api_token) as client:
        yield client
示例#7
0
def _get_wrist_trajs(
    base_start=-1.5, base_end=-1, before=3, after=3, fs_video=30, n_parts=12
):
    """Load in wrist trajectories around move onset events."""
    with DandiAPIClient() as client:
        paths = []
        for file in client.get_dandiset("000055", "draft").get_assets_with_path_prefix(""):
            paths.append(file.path)
    paths = natsort.natsorted(paths)

    displ_lst, part_lst, time_lst, pose_lst = [], [], [], []
    for pat in range(n_parts):
        fids = [val for val in paths if "sub-" + str(pat + 1).zfill(2) in val]
        for i, fid in enumerate(fids):
            with DandiAPIClient() as client:
                asset = client.get_dandiset("000055", "draft").get_asset_by_path(fid)
                s3_path = asset.get_content_url(follow_redirects=1, strip_query=True)
            with NWBHDF5IO(
                s3_path, mode="r", driver="ros3"
            ) as io:
                nwb_file = io.read()

                # Segment data
                events = nwb_file.processing["behavior"].data_interfaces["ReachEvents"]
                times = events.timestamps[:]
                starts = times - before
                stops = times + after

                # Get event hand label
                contra_arm = events.description
                contra_arm = map(lambda x: x.capitalize(), contra_arm.split("_"))
                contra_arm = list(contra_arm)
                contra_arm = "_".join(contra_arm)
                ipsi_arm = (
                    "R" + contra_arm[1:]
                    if contra_arm[0] == "L"
                    else "L" + contra_arm[1:]
                )

                reach_lab = ["contra", "ipsi"]
                for k, reach_arm in enumerate([contra_arm, ipsi_arm]):
                    spatial_series = nwb_file.processing["behavior"].data_interfaces[
                        "Position"
                    ][reach_arm]
                    ep_dat = align_by_times(spatial_series, starts, stops)
                    ep_dat_mag = np.sqrt(
                        np.square(ep_dat[..., 0]) + np.square(ep_dat[..., 1])
                    )

                    # Interpolate and median filter
                    for j in range(ep_dat_mag.shape[0]):
                        df_mag = pd.DataFrame(ep_dat_mag[j, :])
                        df_mag = df_mag.interpolate(method="pad")
                        tmp_val = (
                            df_mag.values.copy().flatten()
                        )  # medfilt(df_mag.values, kernel_size=31)
                        df_mag = pd.DataFrame(tmp_val[::-1])
                        df_mag = df_mag.interpolate(method="pad")
                        ep_dat_mag[j, :] = medfilt(
                            df_mag.values.copy().flatten()[::-1], kernel_size=31
                        )

                    zero_ind = timeseries_time_to_ind(spatial_series, before)
                    base_start_ind = timeseries_time_to_ind(
                        spatial_series, base_start + before
                    )
                    base_end_ind = timeseries_time_to_ind(
                        spatial_series, base_end + before
                    )
                    n_tpoints = ep_dat_mag.shape[1]
                    t_vals = np.arange(n_tpoints) / fs_video - before

                    # Subtract baseline from position data
                    for j in range(ep_dat_mag.shape[0]):
                        curr_magnitude = ep_dat_mag[j, :]
                        curr_magnitude = np.abs(
                            curr_magnitude
                            - np.mean(curr_magnitude[base_start_ind:base_end_ind])
                        )
                        curr_magnitude[np.isnan(curr_magnitude)] = 0
                        displ_lst.extend(curr_magnitude.tolist())
                        part_lst.extend(["P" + str(pat + 1).zfill(2)] * n_tpoints)
                        time_lst.extend(t_vals.tolist())
                        pose_lst.extend([reach_lab[k]] * n_tpoints)

            del nwb_file, io

    df_pose = pd.DataFrame(
        {"Displ": displ_lst, "Sbj": part_lst, "Time": time_lst, "Contra": pose_lst}
    )
    return df_pose, part_lst
示例#8
0
def plot_ecog_descript(
    n_elecs_tot,
    n_elecs_good,
    part_ids,
    nparts=12,
    allLH=False,
    nrows=3,
    chan_labels="all",
    width=7,
    height=3,
):
    """Plot ECoG electrode positions and identified noisy
    electrodes side by side."""
    with DandiAPIClient() as client:
        paths = []
        for file in client.get_dandiset("000055", "draft").get_assets_with_path_prefix(""):
            paths.append(file.path)
    paths = natsort.natsorted(paths)

    fig = plt.figure(figsize=(width * 3, height * 3), dpi=150)
    # First subplot: electrode locations
    ncols = nparts // nrows
    gs = gridspec.GridSpec(
        nrows=nrows,
        ncols=ncols,  # +2,
        figure=fig,
        width_ratios=[width / ncols]
        * ncols,  # [width/ncols/2]*ncols+[width/10, 4*width/10],
        height_ratios=[height / nrows] * nrows,
        wspace=0,
        hspace=-0.5,
    )
    ax = [None] * (nparts)  # +1)

    for part_ind in tqdm(range(nparts)):
        # Load NWB data file
        fids = [val for val in paths if "sub-" + str(part_ind + 1).zfill(2) in val]
        with DandiAPIClient() as client:
            asset = client.get_dandiset("000055", "draft").get_asset_by_path(fids[0])
            s3_path = asset.get_content_url(follow_redirects=1, strip_query=True)
        with NWBHDF5IO(s3_path, mode="r", driver="ros3") as io:
            nwb = io.read()

            # Determine hemisphere to display
            if allLH:
                sides_2_display = "l"
            else:
                average_xpos_sign = np.nanmean(nwb.electrodes["x"][:])
                sides_2_display = "r" if average_xpos_sign > 0 else "l"

            # Run electrode plotting function
            ax[part_ind] = fig.add_subplot(gs[part_ind // ncols, part_ind % ncols])
            plot_ecog_electrodes_mni_from_nwb_file(
                nwb,
                chan_labels,
                num_grid_chans=64,
                node_size=50,
                colors="silver",
                alpha=0.9,
                sides_2_display=sides_2_display,
                node_edge_colors="k",
                edge_linewidths=1.5,
                ax_in=ax[part_ind],
                allLH=allLH,
            )

        del nwb, io
    #         ax[part_ind].text(-0.2,0.1,'P'+str(part_ind+1).zfill(2), fontsize=8)
    #     fig.text(0.1, 0.91, '(a) ECoG electrode positions', fontsize=10)

    # Second subplot: noisy electrodes per participant
    #     ax[-1] = fig.add_subplot(gs[:, -1])
    #     ax[-1].bar(part_ids,n_elecs_tot,color='lightgrey')
    #     ax[-1].bar(part_ids,n_elecs_good,color='dimgrey')
    #     ax[-1].spines['right'].set_visible(False)
    #     ax[-1].spines['top'].set_visible(False)
    #     ax[-1].set_xticklabels(part_ids, rotation=45)
    #     ax[-1].legend(['Total','Good'], frameon=False, fontsize=8)
    #     ax[-1].tick_params(labelsize=9)
    #     ax[-1].set_ylabel('Number of electrodes', fontsize=9, labelpad=0)
    #     ax[-1].set_title('(b) Total/good electrodes per participant',
    #                     fontsize=10)
    plt.show()
    return fig
示例#9
0
def load_data_characteristics(nparts=12):
    """Load data characteristics including the number of
    good and total ECoG electrodes, hemisphere implanted,
    and number of recording days for each participant."""
    with DandiAPIClient() as client:
        paths = []
        for file in client.get_dandiset("000055", "draft").get_assets_with_path_prefix(""):
            paths.append(file.path)
    paths = natsort.natsorted(paths)

    n_elecs_tot, n_elecs_good = [], []
    rec_days, hemis, n_elecs_surf_tot, n_elecs_depth_tot = [], [], [], []
    n_elecs_surf_good, n_elecs_depth_good = [], []
    for part_ind in tqdm(range(nparts)):
        fids = [val for val in paths if "sub-" + str(part_ind + 1).zfill(2) in val]
        rec_days.append(len(fids))
        for fid in fids[:1]:
            with DandiAPIClient() as client:
                asset = client.get_dandiset("000055", "draft").get_asset_by_path(fid)
                s3_path = asset.get_content_url(follow_redirects=1, strip_query=True)
            with NWBHDF5IO(s3_path, mode="r", driver="ros3") as io:
                nwb = io.read()

                # Determine good/total electrodes
                n_elecs_good.append(np.sum(nwb.electrodes["good"][:]))
                n_elecs_tot.append(len(nwb.electrodes["good"][:]))

                # Determine implanted hemisphere
                c_wrist = (
                    nwb.processing["behavior"].data_interfaces["ReachEvents"].description[0]
                )
                hemis.append("L" if c_wrist == "r" else "R")

                # Determine surface vs. depth electrode count
                is_surf = identify_elecs(nwb.electrodes["group_name"][:])
                n_elecs_surf_tot.append(np.sum(is_surf))
                n_elecs_depth_tot.append(np.sum(1 - is_surf))
                n_elecs_surf_good.append(
                    np.sum(nwb.electrodes["good"][is_surf.nonzero()[0]])
                )
                n_elecs_depth_good.append(
                    np.sum(nwb.electrodes["good"][(1 - is_surf).nonzero()[0]])
                )

            del nwb, io

    part_nums = [val + 1 for val in range(nparts)]
    part_ids = ["P" + str(val).zfill(2) for val in part_nums]

    return [
        rec_days,
        hemis,
        n_elecs_surf_tot,
        n_elecs_surf_good,
        n_elecs_depth_tot,
        n_elecs_depth_good,
        part_nums,
        part_ids,
        n_elecs_good,
        n_elecs_tot,
    ]
示例#10
0
 def __init__(self, datasets_path: Path):
     self.datasets_path = datasets_path
     self.dandi_client = DandiAPIClient("https://api.dandiarchive.org/api")
     self.s3client = boto3.client("s3",
                                  config=Config(signature_version=UNSIGNED))
示例#11
0
class URLUpdater:
    def __init__(self, datasets_path: Path):
        self.datasets_path = datasets_path
        self.dandi_client = DandiAPIClient("https://api.dandiarchive.org/api")
        self.s3client = boto3.client("s3",
                                     config=Config(signature_version=UNSIGNED))

    def run(self, dandisets=()):
        with self.dandi_client.session():
            for did in dandisets or self.get_dandiset_ids():
                dsdir = self.datasets_path / did
                log.info("Updating URLs for Dandiset %s", did)
                ds = Dataset(str(dsdir))
                self.update_dandiset_urls(did, ds)
                log.info("Pushing to sibling")
                ds.push(to="github")

    def update_dandiset_urls(self, dandiset_id, ds):
        if ds.repo.dirty:
            raise RuntimeError(
                "Dirty repository; clean or save before running")
        ds.repo.always_commit = False
        for a in self.dandi_client.get_dandiset_assets(dandiset_id,
                                                       "draft",
                                                       include_metadata=False):
            path = a["path"]
            log.info("Processing asset %s", path)
            if ds.repo.is_under_annex(path, batch=True):
                file_urls = set(ds.repo.get_urls(path, batch=True))
                bucket_url = self.get_file_bucket_url(dandiset_id, "draft",
                                                      a["asset_id"])
                download_url = (
                    f"https://api.dandiarchive.org/api/dandisets/{dandiset_id}"
                    f"/versions/draft/assets/{a['asset_id']}/download/")
                for url in [bucket_url, download_url]:
                    if url not in file_urls:
                        log.info("Adding URL %s to asset", url)
                        ds.repo.add_url_to_file(path, url, batch=True)
                for url in file_urls:
                    if "dandiarchive.s3.amazonaws.com/girder-assetstore/" in url:
                        log.info("Removing URL %s from asset", url)
                        ds.repo.rm_url(path, url)

            else:
                log.info("File is not managed by git annex; not updating URLs")
        log.info("Commiting changes")
        ds.save(message="Ran use-new-urls.py")

    def get_dandiset_ids(self):
        r = self.dandi_client.get("/dandisets/")
        while True:
            for d in r["results"]:
                yield d["identifier"]
            if r.get("next"):
                r = self.dandi_client.get(r.get("next"))
            else:
                break

    def get_file_bucket_url(self, dandiset_id, version_id, asset_id):
        r = self.dandi_client.send_request(
            "HEAD",
            f"/dandisets/{dandiset_id}/versions/{version_id}/assets/{asset_id}"
            "/download/",
            json_resp=False,
        )
        urlbits = urlparse(r.headers["Location"])
        s3meta = self.s3client.get_object(Bucket="dandiarchive",
                                          Key=urlbits.path.lstrip("/"))
        return urlunparse(
            urlbits._replace(query=f"versionId={s3meta['VersionId']}"))
示例#12
0
def main(api_url, token, dandiset_path, delete_extant, only_metadata):
    client = DandiAPIClient(api_url=api_url, token=token)
    with client.session():
        for dpath in dandiset_path:
            dandiset = APIDandiset(dpath)
            if delete_extant:
                try:
                    client.get_dandiset(dandiset.identifier, "draft")
                except requests.HTTPError as e:
                    if e.response.status_code != 404:
                        raise
                else:
                    print("Dandiset", dandiset.identifier, "already exists; deleting")
                    client.delete(f"/dandisets/{dandiset.identifier}/")
            if only_metadata:
                print("Setting metadata for Dandiset", dandiset.identifier)
                client.set_dandiset_metadata(
                    dandiset.identifier, metadata=dandiset.metadata
                )
            else:
                print("Creating Dandiset", dandiset.identifier)
                client.create_dandiset(
                    name=dandiset.metadata.get("name", ""), metadata=dandiset.metadata
                )
示例#13
0
import json

from dandi.dandiapi import DandiAPIClient

with DandiAPIClient.for_dandi_instance("dandi") as client:
    for dandiset in client.get_dandisets():
        if dandiset.most_recent_published_version is None:
            continue
        latest_dandiset = dandiset.for_version(
            dandiset.most_recent_published_version)
        for asset in latest_dandiset.get_assets():
            metadata = asset.get_metadata()
            if any(mtt is not None and "two-photon" in mtt.name
                   for mtt in (metadata.measurementTechnique or [])):
                print(json.dumps(metadata.json_dict(), indent=4))
                # Can be used to also download the asset:
                # asset.download(pathlib.Path(dandiset.identifier, asset.path))