def _load(self, eid, dataset_types=None, dclass_output=False, dry_run=False, cache_dir=None, download_only=False, clobber=False, offline=False, keep_uuid=False): """ From a Session ID and dataset types, queries Alyx database, downloads the data from Globus, and loads into numpy array. Single session only """ # if the input as an UUID, add the beginning of URL to it cache_dir = self._get_cache_dir(cache_dir) if is_uuid_string(eid): eid = '/sessions/' + eid eid_str = eid[-36:] # get session json information as a dictionary from the alyx API try: ses = self.alyx.get('/sessions/' + eid_str) except requests.HTTPError: raise requests.HTTPError('Session ' + eid_str + ' does not exist') # ses = ses[0] # if no dataset_type is provided: # a) force the output to be a dictionary that provides context to the data # b) download all types that have a data url specified whithin the alf folder dataset_types = [dataset_types] if isinstance(dataset_types, str) else dataset_types if not dataset_types or dataset_types == ['__all__']: dclass_output = True dc = SessionDataInfo.from_session_details(ses, dataset_types=dataset_types, eid=eid_str) # loop over each dataset and download if necessary for ind in range(len(dc)): if dc.url[ind] and not dry_run: relpath = PurePath(dc.url[ind].replace(self._par.HTTP_DATA_SERVER, '.')).parents[0] cache_dir_file = PurePath(cache_dir, relpath) Path(cache_dir_file).mkdir(parents=True, exist_ok=True) dc.local_path[ind] = self._download_file( dc.url[ind], str(cache_dir_file), clobber=clobber, offline=offline, keep_uuid=keep_uuid, file_size=dc.file_size[ind], hash=dc.hash[ind]) # load the files content in variables if requested if not download_only: for ind, fil in enumerate(dc.local_path): dc.data[ind] = load_file_content(fil) # parse output arguments if dclass_output: return dc # if required, parse the output as a list that matches dataset_types requested list_out = [] for dt in dataset_types: if dt not in dc.dataset_type: _logger.warning('dataset ' + dt + ' not found for session: ' + eid_str) list_out.append(None) continue for i, x, in enumerate(dc.dataset_type): if dt == x: if dc.data[i] is not None: list_out.append(dc.data[i]) else: list_out.append(dc.local_path[i]) return list_out
def load_dataset(self, eid: Union[str, Path, UUID], dataset: str, collection: Optional[str] = None, download_only: bool = False) -> Any: """ Load a single dataset from a Session ID and a dataset type. :param eid: Experiment session identifier; may be a UUID, URL, experiment reference string details dict or Path :param dataset: The ALF dataset to load. Supports asterisks as wildcards. :param collection: The collection to which the object belongs, e.g. 'alf/probe01'. For IBL this is the relative path of the file from the session root. Supports asterisks as wildcards. :param download_only: When true the data are downloaded and the file path is returned :return: dataset or a Path object if download_only is true Examples: intervals = one.load_dataset(eid, '_ibl_trials.intervals.npy') intervals = one.load_dataset(eid, '*trials.intervals*') filepath = one.load_dataset(eid '_ibl_trials.intervals.npy', download_only=True) spikes = one.load_dataset(eid 'spikes.times.npy', collection='alf/probe01') """ search_str = 'name__regex,' + dataset.replace('.', r'\.').replace( '*', '.*') if collection: search_str += ',collection__regex,' + collection.replace('*', '.*') results = self.alyx.rest('datasets', 'list', session=eid, django=search_str, exists=True) # Get filenames of returned ALF files collection_set = {x['collection'] for x in results} if len(collection_set) > 1: raise ALFMultipleCollectionsFound( 'Matching dataset belongs to multiple collections:' + ', '.join(collection_set)) if len(results) > 1: raise ALFMultipleObjectsFound( 'The following matching datasets were found: ' + ', '.join(x['name'] for x in results)) if len(results) == 0: raise ALFObjectNotFound(f'Dataset "{dataset}" not found on Alyx') filename = self.download_dataset(results[0]) assert filename is not None, 'failed to download dataset' return filename if download_only else alfio.load_file_content(filename)
def _load(self, eid, dataset_types=None, dclass_output=False, download_only=False, offline=False, **kwargs): """ From a Session ID and dataset types, queries Alyx database, downloads the data from Globus, and loads into numpy array. Single session only """ if alfio.is_uuid_string(eid): eid = '/sessions/' + eid eid_str = eid[-36:] # if no dataset_type is provided: # a) force the output to be a dictionary that provides context to the data # b) download all types that have a data url specified whithin the alf folder dataset_types = [dataset_types] if isinstance(dataset_types, str) else dataset_types if not dataset_types or dataset_types == ['__all__']: dclass_output = True if offline: dc = self._make_dataclass_offline(eid_str, dataset_types, **kwargs) else: dc = self._make_dataclass(eid_str, dataset_types, **kwargs) # load the files content in variables if requested if not download_only: for ind, fil in enumerate(dc.local_path): dc.data[ind] = alfio.load_file_content(fil) # parse output arguments if dclass_output: return dc # if required, parse the output as a list that matches dataset_types requested list_out = [] for dt in dataset_types: if dt not in dc.dataset_type: _logger.warning('dataset ' + dt + ' not found for session: ' + eid_str) list_out.append(None) continue for i, x, in enumerate(dc.dataset_type): if dt == x: if dc.data[i] is not None: list_out.append(dc.data[i]) else: list_out.append(dc.local_path[i]) return list_out
def load_data(self, download=False): """ Load wheel, trial and camera timestamp data :return: wheel, trials """ if download: self.data.wheel = self.one.load_object(self.eid, 'wheel') self.data.trials = self.one.load_object(self.eid, 'trials') cam = self.one.load(self.eid, ['camera.times'], dclass_output=True) self.data.camera_times = { vidio.label_from_path(url): ts for ts, url in zip(cam.data, cam.url) } else: alf_path = self.session_path / 'alf' self.data.wheel = alfio.load_object(alf_path, 'wheel') self.data.trials = alfio.load_object(alf_path, 'trials') self.data.camera_times = { vidio.label_from_path(x): alfio.load_file_content(x) for x in alf_path.glob('*Camera.times*') } assert all(x is not None for x in self.data.values())
def _load(self, eid, dataset_types=None, dclass_output=False, dry_run=False, cache_dir=None, download_only=False, clobber=False, offline=False, keep_uuid=False): """ From a Session ID and dataset types, queries Alyx database, downloads the data from Globus, and loads into numpy array. Single session only """ # if the input as an UUID, add the beginning of URL to it cache_dir = self._get_cache_dir(cache_dir) if is_uuid_string(eid): eid = '/sessions/' + eid eid_str = eid[-36:] # get session json information as a dictionary from the alyx API try: ses = self.alyx.get('/sessions/' + eid_str) except requests.HTTPError: raise requests.HTTPError('Session ' + eid_str + ' does not exist') # ses = ses[0] # if no dataset_type is provided: # a) force the output to be a dictionary that provides context to the data # b) download all types that have a data url specified whithin the alf folder dataset_types = [dataset_types] if isinstance(dataset_types, str) else dataset_types if not dataset_types or dataset_types == ['__all__']: dclass_output = True # this performs the filtering dc = SessionDataInfo.from_session_details(ses, dataset_types=dataset_types, eid=eid_str) # loop over each dataset and download if necessary with concurrent.futures.ThreadPoolExecutor( max_workers=NTHREADS) as executor: futures = [] for ind in range(len(dc)): if dc.url[ind] is None or dry_run: futures.append(None) else: futures.append( executor.submit(self.download_dataset, dc.url[ind], cache_dir=cache_dir, clobber=clobber, offline=offline, keep_uuid=keep_uuid, file_size=dc.file_size[ind], hash=dc.hash[ind])) concurrent.futures.wait( list(filter(lambda x: x is not None, futures))) for ind, future in enumerate(futures): if future is None: continue dc.local_path[ind] = future.result() # load the files content in variables if requested if not download_only: for ind, fil in enumerate(dc.local_path): dc.data[ind] = load_file_content(fil) # parse output arguments if dclass_output: return dc # if required, parse the output as a list that matches dataset_types requested list_out = [] for dt in dataset_types: if dt not in dc.dataset_type: _logger.warning('dataset ' + dt + ' not found for session: ' + eid_str) list_out.append(None) continue for i, x, in enumerate(dc.dataset_type): if dt == x: if dc.data[i] is not None: list_out.append(dc.data[i]) else: list_out.append(dc.local_path[i]) return list_out