def text_dandiset( dandi_client: DandiAPIClient, tmp_path_factory: pytest.TempPathFactory) -> Iterator[Dict[str, Any]]: d = dandi_client.create_dandiset( "Dandiset for testing backups2datalad", { "schemaKey": "Dandiset", "name": "Dandiset for testing backups2datalad", "description": "A test text Dandiset", "contributor": [{ "schemaKey": "Person", "name": "Wodder, John", "roleName": ["dcite:Author", "dcite:ContactPerson"], }], "license": ["spdx:CC0-1.0"], "manifestLocation": ["https://github.com/dandi/dandi-cli"], }, ) dandiset_id = d.identifier dspath = tmp_path_factory.mktemp("text_dandiset") (dspath / dandiset_metadata_file).write_text(f"identifier: '{dandiset_id}'\n") (dspath / "file.txt").write_text("This is test text.\n") (dspath / "v0.txt").write_text("Version 0\n") (dspath / "subdir1").mkdir() (dspath / "subdir1" / "apple.txt").write_text("Apple\n") (dspath / "subdir2").mkdir() (dspath / "subdir2" / "banana.txt").write_text("Banana\n") (dspath / "subdir2" / "coconut.txt").write_text("Coconut\n") def upload_dandiset(paths: Optional[List[str]] = None, **kwargs: Any) -> None: upload( paths=paths or [dspath], dandi_instance="dandi-staging", devel_debug=True, allow_any_path=True, validation="skip", **kwargs, ) try: upload_dandiset() yield { "client": dandi_client, "dspath": dspath, "dandiset": d, "dandiset_id": dandiset_id, "reupload": upload_dandiset, } finally: for v in d.get_versions(): if v.identifier != "draft": dandi_client.delete(f"{d.api_path}versions/{v.identifier}/") d.delete()
def clabel_table_create( common_acts, n_parts=12, data_lp="/data2/users/stepeter/files_nwb/downloads/000055/" ): """Create table of coarse label durations across participants. Labels to include in the table are specified by common_acts.""" with DandiAPIClient() as client: paths = [] for file in client.get_dandiset("000055", "draft").get_assets_with_path_prefix(""): paths.append(file.path) paths = natsort.natsorted(paths) vals_all = np.zeros([n_parts, len(common_acts) + 1]) for part_ind in tqdm(range(n_parts)): fids = [val for val in paths if "sub-" + str(part_ind + 1).zfill(2) in val] for fid in fids: with DandiAPIClient() as client: asset = client.get_dandiset("000055", "draft").get_asset_by_path(fid) s3_path = asset.get_content_url(follow_redirects=1, strip_query=True) with NWBHDF5IO(s3_path, mode="r", driver="ros3") as io: nwb = io.read() curr_labels = nwb.intervals["epochs"].to_dataframe() durations = ( curr_labels.loc[:, "stop_time"].values - curr_labels.loc[:, "start_time"].values ) # Add up durations of each label for s, curr_act in enumerate(common_acts): for i, curr_label in enumerate(curr_labels["labels"].tolist()): if curr_act in curr_label.split(", "): vals_all[part_ind, s] += durations[i] / 3600 # Add up total durations of selected labels (avoid double counting) for i, curr_label in enumerate(curr_labels["labels"].tolist()): in_lab_grp = False for sub_lab in curr_label.split(", "): if sub_lab in common_acts: in_lab_grp = True vals_all[part_ind, -1] += durations[i] / 3600 if in_lab_grp else 0 del nwb, io # Make final table/dataframe common_acts_col = [val.lstrip("Blocklist (").rstrip(")") for val in common_acts] df_all = pd.DataFrame( vals_all.round(1), index=["P" + str(val + 1).zfill(2) for val in range(n_parts)], columns=common_acts_col + ["Total"], ) return df_all
def main( ctx: click.Context, asset_filter: Optional[re.Pattern[str]], dandi_instance: str, force: Optional[str], jobs: int, log_level: int, pdb: bool, quiet_debug: bool, target: Path, s3bucket: str, ) -> None: ctx.obj = DandiDatasetter( dandi_client=ctx.with_resource( DandiAPIClient.for_dandi_instance(dandi_instance)), target_path=target, config=Config( asset_filter=asset_filter, jobs=jobs, force=force, s3bucket=s3bucket, ), ) if pdb: sys.excepthook = pdb_excepthook if quiet_debug: log.setLevel(logging.DEBUG) log_level = logging.INFO logging.basicConfig( format="%(asctime)s [%(levelname)-8s] %(name)s %(message)s", datefmt="%Y-%m-%dT%H:%M:%S%z", level=log_level, force=True, # Override dandi's settings ) ctx.obj.debug_logfile()
def main(): g_client = GirderCli("http://3.19.164.171") a_client = DandiAPIClient("https://api.dandiarchive.org/api") with a_client.session(): g_client.dandi_authenticate() # gather all dandisets known to girder: hardcoded _id for "drafts" collection g_dandisets = list( g_client.listFolder("5e59bb0af19e820ab6ea6c62", "collection")) for dandiset, girder_id in [(x["name"], x["_id"]) for x in g_dandisets]: if dandiset != "000026": continue print(f"DANDI:{dandiset}", end="\t") g_meta, g_assets_ = g_client.get_dandiset_and_assets( girder_id, "folder") g_assets = list(g_assets_) # harmonize and get only what we care about ATM - path and size, # or otherwise we would need to query each asset for metadata g_assets_h = set( (a["path"].lstrip("/"), a["size"]) for a in g_assets) # Yarik trusts nobody. Two identical bugs are less likely! g_assets_adhoc = set(adhoc_list_girder(girder_id, g_client)) if g_assets_h != g_assets_adhoc: print("ad-hoc and dandi listing of girder differs!") import pdb pdb.set_trace() a_meta, a_assets_ = a_client.get_dandiset_and_assets( dandiset, "draft") a_assets = list(a_assets_) a_assets_h = set( (a["path"].lstrip("/"), a["size"]) for a in a_assets) if a_assets_h != g_assets_h: print("differs") import pdb pdb.set_trace() else: print(f"{len(a_assets)} assets the same")
from dandi.dandiapi import DandiAPIClient from .spec_utils import project_power, proj_mat_compute # Set parameters sp = '' # save path win_spec_len = 30 # sec large_win = 30 * 60 # sec fs = 500 # Hz freq_range = [3, 125] # Hz hgrid_fid = "headGrid.mat" aal_fid = "aal_rois.mat" n_parts = 12 # number of participants # Determine all file paths with DandiAPIClient() as client: paths = [] for file in client.get_dandiset("000055", "draft").get_assets_under_path(""): paths.append(file.path) paths = natsort.natsorted(paths) # Create ROI projection matrices elec_dens_thresh = 3 # threshold for dipole density proj_mats = [] for s in range(n_parts): fid = [val for val in paths if "sub-" + str(s + 1).zfill(2) in val][0] with DandiAPIClient() as client: asset = client.get_dandiset("000055", "draft").get_asset_by_path(fid) s3_path = asset.get_content_url(follow_redirects=1, strip_query=True)
def dandi_client() -> DandiAPIClient: api_token = os.environ["DANDI_API_KEY"] with DandiAPIClient.for_dandi_instance("dandi-staging", token=api_token) as client: yield client
def _get_wrist_trajs( base_start=-1.5, base_end=-1, before=3, after=3, fs_video=30, n_parts=12 ): """Load in wrist trajectories around move onset events.""" with DandiAPIClient() as client: paths = [] for file in client.get_dandiset("000055", "draft").get_assets_with_path_prefix(""): paths.append(file.path) paths = natsort.natsorted(paths) displ_lst, part_lst, time_lst, pose_lst = [], [], [], [] for pat in range(n_parts): fids = [val for val in paths if "sub-" + str(pat + 1).zfill(2) in val] for i, fid in enumerate(fids): with DandiAPIClient() as client: asset = client.get_dandiset("000055", "draft").get_asset_by_path(fid) s3_path = asset.get_content_url(follow_redirects=1, strip_query=True) with NWBHDF5IO( s3_path, mode="r", driver="ros3" ) as io: nwb_file = io.read() # Segment data events = nwb_file.processing["behavior"].data_interfaces["ReachEvents"] times = events.timestamps[:] starts = times - before stops = times + after # Get event hand label contra_arm = events.description contra_arm = map(lambda x: x.capitalize(), contra_arm.split("_")) contra_arm = list(contra_arm) contra_arm = "_".join(contra_arm) ipsi_arm = ( "R" + contra_arm[1:] if contra_arm[0] == "L" else "L" + contra_arm[1:] ) reach_lab = ["contra", "ipsi"] for k, reach_arm in enumerate([contra_arm, ipsi_arm]): spatial_series = nwb_file.processing["behavior"].data_interfaces[ "Position" ][reach_arm] ep_dat = align_by_times(spatial_series, starts, stops) ep_dat_mag = np.sqrt( np.square(ep_dat[..., 0]) + np.square(ep_dat[..., 1]) ) # Interpolate and median filter for j in range(ep_dat_mag.shape[0]): df_mag = pd.DataFrame(ep_dat_mag[j, :]) df_mag = df_mag.interpolate(method="pad") tmp_val = ( df_mag.values.copy().flatten() ) # medfilt(df_mag.values, kernel_size=31) df_mag = pd.DataFrame(tmp_val[::-1]) df_mag = df_mag.interpolate(method="pad") ep_dat_mag[j, :] = medfilt( df_mag.values.copy().flatten()[::-1], kernel_size=31 ) zero_ind = timeseries_time_to_ind(spatial_series, before) base_start_ind = timeseries_time_to_ind( spatial_series, base_start + before ) base_end_ind = timeseries_time_to_ind( spatial_series, base_end + before ) n_tpoints = ep_dat_mag.shape[1] t_vals = np.arange(n_tpoints) / fs_video - before # Subtract baseline from position data for j in range(ep_dat_mag.shape[0]): curr_magnitude = ep_dat_mag[j, :] curr_magnitude = np.abs( curr_magnitude - np.mean(curr_magnitude[base_start_ind:base_end_ind]) ) curr_magnitude[np.isnan(curr_magnitude)] = 0 displ_lst.extend(curr_magnitude.tolist()) part_lst.extend(["P" + str(pat + 1).zfill(2)] * n_tpoints) time_lst.extend(t_vals.tolist()) pose_lst.extend([reach_lab[k]] * n_tpoints) del nwb_file, io df_pose = pd.DataFrame( {"Displ": displ_lst, "Sbj": part_lst, "Time": time_lst, "Contra": pose_lst} ) return df_pose, part_lst
def plot_ecog_descript( n_elecs_tot, n_elecs_good, part_ids, nparts=12, allLH=False, nrows=3, chan_labels="all", width=7, height=3, ): """Plot ECoG electrode positions and identified noisy electrodes side by side.""" with DandiAPIClient() as client: paths = [] for file in client.get_dandiset("000055", "draft").get_assets_with_path_prefix(""): paths.append(file.path) paths = natsort.natsorted(paths) fig = plt.figure(figsize=(width * 3, height * 3), dpi=150) # First subplot: electrode locations ncols = nparts // nrows gs = gridspec.GridSpec( nrows=nrows, ncols=ncols, # +2, figure=fig, width_ratios=[width / ncols] * ncols, # [width/ncols/2]*ncols+[width/10, 4*width/10], height_ratios=[height / nrows] * nrows, wspace=0, hspace=-0.5, ) ax = [None] * (nparts) # +1) for part_ind in tqdm(range(nparts)): # Load NWB data file fids = [val for val in paths if "sub-" + str(part_ind + 1).zfill(2) in val] with DandiAPIClient() as client: asset = client.get_dandiset("000055", "draft").get_asset_by_path(fids[0]) s3_path = asset.get_content_url(follow_redirects=1, strip_query=True) with NWBHDF5IO(s3_path, mode="r", driver="ros3") as io: nwb = io.read() # Determine hemisphere to display if allLH: sides_2_display = "l" else: average_xpos_sign = np.nanmean(nwb.electrodes["x"][:]) sides_2_display = "r" if average_xpos_sign > 0 else "l" # Run electrode plotting function ax[part_ind] = fig.add_subplot(gs[part_ind // ncols, part_ind % ncols]) plot_ecog_electrodes_mni_from_nwb_file( nwb, chan_labels, num_grid_chans=64, node_size=50, colors="silver", alpha=0.9, sides_2_display=sides_2_display, node_edge_colors="k", edge_linewidths=1.5, ax_in=ax[part_ind], allLH=allLH, ) del nwb, io # ax[part_ind].text(-0.2,0.1,'P'+str(part_ind+1).zfill(2), fontsize=8) # fig.text(0.1, 0.91, '(a) ECoG electrode positions', fontsize=10) # Second subplot: noisy electrodes per participant # ax[-1] = fig.add_subplot(gs[:, -1]) # ax[-1].bar(part_ids,n_elecs_tot,color='lightgrey') # ax[-1].bar(part_ids,n_elecs_good,color='dimgrey') # ax[-1].spines['right'].set_visible(False) # ax[-1].spines['top'].set_visible(False) # ax[-1].set_xticklabels(part_ids, rotation=45) # ax[-1].legend(['Total','Good'], frameon=False, fontsize=8) # ax[-1].tick_params(labelsize=9) # ax[-1].set_ylabel('Number of electrodes', fontsize=9, labelpad=0) # ax[-1].set_title('(b) Total/good electrodes per participant', # fontsize=10) plt.show() return fig
def load_data_characteristics(nparts=12): """Load data characteristics including the number of good and total ECoG electrodes, hemisphere implanted, and number of recording days for each participant.""" with DandiAPIClient() as client: paths = [] for file in client.get_dandiset("000055", "draft").get_assets_with_path_prefix(""): paths.append(file.path) paths = natsort.natsorted(paths) n_elecs_tot, n_elecs_good = [], [] rec_days, hemis, n_elecs_surf_tot, n_elecs_depth_tot = [], [], [], [] n_elecs_surf_good, n_elecs_depth_good = [], [] for part_ind in tqdm(range(nparts)): fids = [val for val in paths if "sub-" + str(part_ind + 1).zfill(2) in val] rec_days.append(len(fids)) for fid in fids[:1]: with DandiAPIClient() as client: asset = client.get_dandiset("000055", "draft").get_asset_by_path(fid) s3_path = asset.get_content_url(follow_redirects=1, strip_query=True) with NWBHDF5IO(s3_path, mode="r", driver="ros3") as io: nwb = io.read() # Determine good/total electrodes n_elecs_good.append(np.sum(nwb.electrodes["good"][:])) n_elecs_tot.append(len(nwb.electrodes["good"][:])) # Determine implanted hemisphere c_wrist = ( nwb.processing["behavior"].data_interfaces["ReachEvents"].description[0] ) hemis.append("L" if c_wrist == "r" else "R") # Determine surface vs. depth electrode count is_surf = identify_elecs(nwb.electrodes["group_name"][:]) n_elecs_surf_tot.append(np.sum(is_surf)) n_elecs_depth_tot.append(np.sum(1 - is_surf)) n_elecs_surf_good.append( np.sum(nwb.electrodes["good"][is_surf.nonzero()[0]]) ) n_elecs_depth_good.append( np.sum(nwb.electrodes["good"][(1 - is_surf).nonzero()[0]]) ) del nwb, io part_nums = [val + 1 for val in range(nparts)] part_ids = ["P" + str(val).zfill(2) for val in part_nums] return [ rec_days, hemis, n_elecs_surf_tot, n_elecs_surf_good, n_elecs_depth_tot, n_elecs_depth_good, part_nums, part_ids, n_elecs_good, n_elecs_tot, ]
def __init__(self, datasets_path: Path): self.datasets_path = datasets_path self.dandi_client = DandiAPIClient("https://api.dandiarchive.org/api") self.s3client = boto3.client("s3", config=Config(signature_version=UNSIGNED))
class URLUpdater: def __init__(self, datasets_path: Path): self.datasets_path = datasets_path self.dandi_client = DandiAPIClient("https://api.dandiarchive.org/api") self.s3client = boto3.client("s3", config=Config(signature_version=UNSIGNED)) def run(self, dandisets=()): with self.dandi_client.session(): for did in dandisets or self.get_dandiset_ids(): dsdir = self.datasets_path / did log.info("Updating URLs for Dandiset %s", did) ds = Dataset(str(dsdir)) self.update_dandiset_urls(did, ds) log.info("Pushing to sibling") ds.push(to="github") def update_dandiset_urls(self, dandiset_id, ds): if ds.repo.dirty: raise RuntimeError( "Dirty repository; clean or save before running") ds.repo.always_commit = False for a in self.dandi_client.get_dandiset_assets(dandiset_id, "draft", include_metadata=False): path = a["path"] log.info("Processing asset %s", path) if ds.repo.is_under_annex(path, batch=True): file_urls = set(ds.repo.get_urls(path, batch=True)) bucket_url = self.get_file_bucket_url(dandiset_id, "draft", a["asset_id"]) download_url = ( f"https://api.dandiarchive.org/api/dandisets/{dandiset_id}" f"/versions/draft/assets/{a['asset_id']}/download/") for url in [bucket_url, download_url]: if url not in file_urls: log.info("Adding URL %s to asset", url) ds.repo.add_url_to_file(path, url, batch=True) for url in file_urls: if "dandiarchive.s3.amazonaws.com/girder-assetstore/" in url: log.info("Removing URL %s from asset", url) ds.repo.rm_url(path, url) else: log.info("File is not managed by git annex; not updating URLs") log.info("Commiting changes") ds.save(message="Ran use-new-urls.py") def get_dandiset_ids(self): r = self.dandi_client.get("/dandisets/") while True: for d in r["results"]: yield d["identifier"] if r.get("next"): r = self.dandi_client.get(r.get("next")) else: break def get_file_bucket_url(self, dandiset_id, version_id, asset_id): r = self.dandi_client.send_request( "HEAD", f"/dandisets/{dandiset_id}/versions/{version_id}/assets/{asset_id}" "/download/", json_resp=False, ) urlbits = urlparse(r.headers["Location"]) s3meta = self.s3client.get_object(Bucket="dandiarchive", Key=urlbits.path.lstrip("/")) return urlunparse( urlbits._replace(query=f"versionId={s3meta['VersionId']}"))
def main(api_url, token, dandiset_path, delete_extant, only_metadata): client = DandiAPIClient(api_url=api_url, token=token) with client.session(): for dpath in dandiset_path: dandiset = APIDandiset(dpath) if delete_extant: try: client.get_dandiset(dandiset.identifier, "draft") except requests.HTTPError as e: if e.response.status_code != 404: raise else: print("Dandiset", dandiset.identifier, "already exists; deleting") client.delete(f"/dandisets/{dandiset.identifier}/") if only_metadata: print("Setting metadata for Dandiset", dandiset.identifier) client.set_dandiset_metadata( dandiset.identifier, metadata=dandiset.metadata ) else: print("Creating Dandiset", dandiset.identifier) client.create_dandiset( name=dandiset.metadata.get("name", ""), metadata=dandiset.metadata )
import json from dandi.dandiapi import DandiAPIClient with DandiAPIClient.for_dandi_instance("dandi") as client: for dandiset in client.get_dandisets(): if dandiset.most_recent_published_version is None: continue latest_dandiset = dandiset.for_version( dandiset.most_recent_published_version) for asset in latest_dandiset.get_assets(): metadata = asset.get_metadata() if any(mtt is not None and "two-photon" in mtt.name for mtt in (metadata.measurementTechnique or [])): print(json.dumps(metadata.json_dict(), indent=4)) # Can be used to also download the asset: # asset.download(pathlib.Path(dandiset.identifier, asset.path))