def insert_new_params( cls, processing_method: str, paramset_id: int, paramset_desc: str, params: dict, processing_method_desc: str = "", ): ProcessingMethod.insert1({"processing_method": processing_method}, skip_duplicates=True) param_dict = { "processing_method": processing_method, "paramset_id": paramset_id, "paramset_desc": paramset_desc, "params": params, "param_set_hash": dict_to_uuid(params), } q_param = cls & {"param_set_hash": param_dict["param_set_hash"]} if q_param: # If the specified param-set already exists pname = q_param.fetch1("paramset_id") if pname == paramset_id: # If the existed set has the same name: job done return else: # If not same name: human error, try adding with different name raise dj.DataJointError( "The specified param-set already exists - name: {}".format( pname)) else: cls.insert1(param_dict)
def local_endpoint(self): if 'globus.local_endpoint' in dj.config: return (dj.config['custom']['globus.local_endpoint'], dj.config['custom']['globus.local_endpoint_subdir'], dj.config['custom']['globus.local_endpoint_local_path']) else: raise dj.DataJointError("globus_local_endpoint not configured")
def format_value(self, value): """format value """ if self.foreign_is_manuallookup: if value['existing_entries'] == '<new>': value.pop('existing_entries') try: self.foreign_table.insert1( self.foreign_table_format_value(value)) except dj.DataJointError as e: raise dj.DataJointError( "An error occured while inserting into parent table" f" {self.foreign_table.full_table_name}: {e}") if self.aliased is None: return {name: value[name] for name in self.names} else: return { name: value[self.aliased[name]] for name in self.names } elif self.singular: return {self.name: value['existing_entries']} else: values = value['existing_entries'].strip('()').split(', ') # strip any quotes values = [ele.strip().strip('"').strip("'") for ele in values] return dict(zip(self.names, values)) if self.is_blob: value = self._process_blob_value(value) # return value return {self.name: value}
def format_value(self, value): """format value """ if self.foreign_is_manuallookup: if value['existing_entries'] == '<new>': value.pop('existing_entries') try: self.foreign_table.insert1( self.foreign_table_format_value(value) ) except dj.DataJointError as e: raise dj.DataJointError( "An error occured while inserting into parent table" f" {self.foreign_table.full_table_name}: {e}" ) if self.aliased is None: value = value[self.name] else: value = value[self.aliased[self.name]] else: value = value['existing_entries'] if self.attr.is_blob: value = self._process_blob_value(value) return value
def retrieve1(cls, key): ''' retrieve related files for a given key ''' self = cls() # >>> list(key.keys()) # ['subject_id', 'session', 'trial', 'electrode_group', 'globus_alia log.debug(key) lep, lep_sub, lep_dir = GlobusStorageLocation().local_endpoint log.info('local_endpoint: {}:{} -> {}'.format(lep, lep_sub, lep_dir)) # get session related information needed for filenames/records sinfo = ((lab.WaterRestriction * lab.Subject.proj() * experiment.Session() * experiment.SessionTrial) & key).fetch1() h2o = sinfo['water_restriction_number'] sdate = sinfo['session_date'] eg = key['electrode_group'] trial = key['trial'] # build file locations: # fpat: base file pattern for this sessions files # gbase: globus-url base path for this sessions files fpat = '{}_{}_{}_g0_t{}'.format(h2o, sdate, eg, trial) gbase = '/'.join((h2o, str(sdate), str(eg), fpat)) repname, rep, rep_sub = (GlobusStorageLocation() & key).fetch()[0] gsm = self.get_gsm() gsm.activate_endpoint(lep) # XXX: cache this / prevent duplicate RPC? gsm.activate_endpoint(rep) # XXX: cache this / prevent duplicate RPC? sfxmap = { '.imec.ap.bin': ArchivedRawEphysTrial.ArchivedApChannel, '.imec.ap.meta': ArchivedRawEphysTrial.ArchivedApMeta, '.imec.lf.bin': ArchivedRawEphysTrial.ArchivedLfChannel, '.imec.lf.meta': ArchivedRawEphysTrial.ArchivedLfMeta } for sfx, cls in sfxmap.items(): if cls & key: log.debug('record found for {} & {}'.format(cls.__name__, key)) gname = '{}{}'.format(gbase, sfx) srcp = '{}:/{}/{}'.format(rep, rep_sub, gname) dstp = '{}:/{}/{}'.format(lep, lep_sub, gname) log.info('transferring {} to {}'.format(srcp, dstp)) # XXX: check if exists 1st? (manually or via API copy-checksum) if not gsm.cp(srcp, dstp): emsg = "couldn't transfer {} to {}".format(srcp, dstp) log.error(emsg) raise dj.DataJointError(emsg)
def put(self, obj): if obj is None: return if isinstance(obj, str): obj = obj.strip().lower() else: raise dj.DataJointError( f"lookup name '{obj}' must be of type " f"'str' and not '{type(obj)}'." ) if not obj.isidentifier(): raise dj.DataJointError( f"lookup name '{obj}' is not an identifier; " "it containes characters besides alphanumeric and/or " "an underscore." ) return obj
def _insert_new_params(tbl_class, param_set_name: str, params: dict): param_dict = {'param_set_name': param_set_name, 'params': params, 'param_set_hash': UUID(dict_to_hash(params))} q_param = tbl_class & {'param_set_hash': param_dict['param_set_hash']} if q_param: # If the specified param-set already exists pname = q_param.fetch1('param_set_name') if pname == param_set_name: # If the existed set has the same name: job done return else: # If not same name: human error, trying to add the same paramset with different name raise dj.DataJointError('The specified param-set already exists - name: {}'.format(pname)) else: tbl_class.insert1(param_dict)
def register(self, **kwargs): for key, rel in kwargs.items(): if key in self.store: return if isinstance(rel, dj.Computed) or \ isinstance(rel, dj.Imported) or \ isinstance(rel, dj.Part): raise dj.DataJointError( "Data should not be entered directly in Computed, Imported, or Subordinate tables." ) class ReturnValue(wtf.Form): _rel = rel @classmethod def append_field(cls, name, field): setattr(cls, name, field) return cls def insert(self2, replace=False): rel = self2._rel dat = {} for k, v in self2._fields.items(): if v.data is not None and k != 'REFERRER': # was not specified and is also not required if isinstance(v.data, datetime.datetime) or isinstance( v.data, datetime.date): dat[k] = str(v.data) else: dat[k] = v.data rel.insert1(dat, replace=replace) ReturnValue.required = OrderedDict() for name, attr in rel.heading.attributes.items(): ReturnValue.append_field(name, field_factory(attr)) #setattr(ReturnValue, name, field_factory(attr)) ReturnValue.required[ name] = not attr.nullable and attr.default is None ReturnValue.append_field( 'REFERRER', wtf.StringField(label='REFERRER', widget=HiddenInput())) self.store[key] = ReturnValue
def retrieve1(self, key): ''' retrieve related files for a given key ''' log.debug(key) # get remote file information linfo = (self * GlobusStorageLocation & key).fetch1() rep = linfo['globus_endpoint'] rep_sub = linfo['globus_path'] vfile = linfo['video_file_name'] # get session related information needed for filenames/records sinfo = ((lab.WaterRestriction * lab.Subject.proj() * ( (tracking.TrackingDevice * tracking.Tracking.proj()) & key) * experiment.Session * experiment.SessionTrial) & key).fetch1() h2o = sinfo['water_restriction_number'] sdate_iso = sinfo['session_date'].isoformat() # YYYY-MM-DD # get local endpoint information globus_alias = 'raw-video' le = GlobusStorageLocation.local_endpoint(globus_alias) lep, lep_sub = le['endpoint'], le['endpoint_subdir'] # build source/destination paths & initiate transfer gfile = '{}/{}/{}/{}'.format(h2o, sdate_iso, 'video', vfile) srcp = '{}:{}/{}'.format(rep, rep_sub, gfile) # source path dstp = '{}:{}/{}'.format(lep, lep_sub, gfile) # dset path gsm = self.get_gsm() gsm.activate_endpoint(lep) # XXX: cache this / prevent duplicate RPC? gsm.activate_endpoint(rep) # XXX: cache this / prevent duplicate RPC? log.info('transferring {} to {}'.format(srcp, dstp)) if not gsm.cp(dstp, srcp): emsg = "couldn't transfer {} to {}".format(srcp, dstp) log.error(emsg) raise dj.DataJointError(emsg)
def local_endpoint(cls, globus_alias=None): ''' return local endpoint for globus_alias from dj.config expects: globus.local_endpoints: { globus_alias: { 'endpoint': uuid, # UUID of local endpoint 'endpoint_subdir': str, # unix-style path within endpoint 'endpoint_path': str # corresponding local path } ''' le = dj.config.get('custom', {}).get('globus.local_endpoints', None) if le is None or globus_alias not in le: raise dj.DataJointError( "globus_local_endpoints for {} not configured".format( globus_alias)) return le[globus_alias]
def local_endpoint(cls, globus_alias=None): ''' return local endpoint for globus_alias from dj.config expects: globus.local_endpoints: { globus_alias: { 'endpoint': uuid, # UUID of local endpoint 'endpoint_subdir': str, # unix-style path within endpoint 'endpoint_path': str # corresponding local path } ''' custom = dj.config.get('custom', None) if custom and 'globus.local_endpoints' in custom: try: return custom['globus.local_endpoints'][globus_alias] except KeyError: pass raise dj.DataJointError( "globus_local_endpoints for {} not configured".format( globus_alias))
def insert_new_params(cls, processing_method: str, paramset_idx: int, paramset_desc: str, params: dict): param_dict = { 'processing_method': processing_method, 'paramset_idx': paramset_idx, 'paramset_desc': paramset_desc, 'params': params, 'param_set_hash': UUID(dict_to_hash(params)) } q_param = cls & {'param_set_hash': param_dict['param_set_hash']} if q_param: # If the specified param-set already exists pname = q_param.fetch1('param_set_name') if pname == paramset_idx: # If the existed set has the same name: job done return else: # If not same name: human error, trying to add the same paramset with different name raise dj.DataJointError( 'The specified param-set already exists - name: {}'.format( pname)) else: cls.insert1(param_dict)
def insert_new_params(cls, preprocess_method: str, paramset_idx: int, paramset_desc: str, params: dict): param_dict = { "preprocess_method": preprocess_method, "paramset_idx": paramset_idx, "paramset_desc": paramset_desc, "params": params, "param_set_hash": dict_to_uuid(params), } q_param = cls & {"param_set_hash": param_dict["param_set_hash"]} if q_param: # If the specified param-set already exists pname = q_param.fetch1("paramset_idx") if pname == paramset_idx: # If the existed set has the same name: job done return else: # If not same name: human error, trying to add the same paramset with different name raise dj.DataJointError( "The specified param-set already exists - name: {}".format( pname)) else: cls.insert1(param_dict)
def insert_new_params(cls, processing_method: str, paramset_idx: int, paramset_desc: str, params: dict): param_dict = { 'clustering_method': processing_method, 'paramset_idx': paramset_idx, 'paramset_desc': paramset_desc, 'params': params, 'param_set_hash': dict_to_uuid(params) } param_query = cls & {'param_set_hash': param_dict['param_set_hash']} if param_query: # If the specified param-set already exists existing_paramset_idx = param_query.fetch1('paramset_idx') if existing_paramset_idx == paramset_idx: # If the existing set has the same paramset_idx: job done return else: # If not same name: human error, trying to add the same paramset with different name raise dj.DataJointError( 'The specified param-set' ' already exists - paramset_idx: {}'.format( existing_paramset_idx)) else: cls.insert1(param_dict)
def commit(skey, sfiles): log.info('commit. skey: {}'.format(skey)) if not sfiles: log.info('commit skipping {}. no files in set'.format(skey)) # log.debug('sfiles: {}'.format(sfiles)) h2o, sdate, ftypes = set(), set(), set() dftmap = {} # device:file:trial via load_campath mapping files dvfmap = defaultdict( lambda: defaultdict(list)) # device:video:file dtfmap = defaultdict( lambda: defaultdict(list)) # device:trial:file for s in sfiles: if s['file_type'] == 'tracking-video-trial': dvfmap[s['position']][s['video']].append(s) h2o.add(s['water_restriction_number']) sdate.add(s['session_date']) ftypes.add(s['file_type']) if s['file_type'] == 'tracking-video-map': # xfer & load camera:trial map ex: dl55_20190108_side.txtb fsp = s['file_subpath'] lsp = '/tmp/' + s['file_subpath'].split('/')[-1] srcp = '{}:{}/{}'.format(rep, rep_sub, fsp) dstp = '{}:{}/{}'.format(lep, lep_sub, lsp) log.info('transferring {} to {}'.format(srcp, dstp)) if not gsm.cp(srcp, dstp): # XXX: check if exists 1st? emsg = "couldn't transfer {} to {}".format(srcp, dstp) log.error(emsg) raise dj.DataJointError(emsg) lfname = lep_dir + lsp # local filesysem copy location dftmap[s['position']] = TrackingIngest.load_campath(lfname) if len(h2o) != 1 or len(sdate) != 1: log.info('skipping. bad h2o {} or session date {}'.format( h2o, sdate)) return h2o, sdate = next(iter(h2o)), next(iter(sdate)) for d in dvfmap: if d in dftmap: # remap video no -> trial dtfmap[d] = { dftmap[d][v]: dict(dvfmap[d][v], trial=dftmap[d][v]) for v in dvfmap[d] } else: # assign video no -> trial dtfmap[d] = { k: dict(v, trial=v['video']) for k, v in dvfmap[d].items() } # DataSet ds_type = 'tracking-video' ds_name = '{}_{}_{}'.format(h2o, sdate, ds_type) ds_key = {'dataset_name': ds_name, 'globus_alias': globus_alias} if (DataSet & ds_key): log.info( 'DataSet: {} already exists. Skipping.'.format(ds_key)) return DataSet.insert1({ **ds_key, 'dataset_type': ds_type }, allow_direct_insert=True) # ArchivedSession as_key = { k: v for k, v in smap[skey].items() if k in ArchivedSession.primary_key } ArchivedSession.insert1({ **as_key, 'globus_alias': globus_alias }, allow_direct_insert=True, skip_duplicates=True) for d in dtfmap: # ArchivedTrackingVideo atv_key = {**as_key, **ds_key, 'tracking_device': tpos_dev[d]} ArchivedTrackingVideo.insert1(atv_key, allow_direct_insert=True) for t in dtfmap[d]: for f in dtfmap[d][t]: DataSet.PhysicalFile.insert1({ **ds_key, **f }, allow_direct_insert=True, ignore_extra_fields=True) ArchivedTrackingVideo.TrialVideo.insert1( { **atv_key, **ds_key, 'trial': t, 'file_subpath': f['file_subpath'] }, allow_direct_insert=True)
def _insert(self, formatted_dict, _id=None, primary_dict=None, check_reserved=True, override_update_truth=False, **kwargs): """insert helper function """ insert_dict = {} for key, value in formatted_dict.items(): if key in self.fields: insert_dict.update(self.fields[key].format_value(value)) if _id is None or kwargs.get('replace', False): truth = True elif len(self.table & _id) == 0: if override_update_truth: truth = True else: raise dj.DataJointError( f'Entry {_id} does not exist; cannot update.') else: truth = False if primary_dict is not None: insert_dict = {**primary_dict, **insert_dict} primary_dict = { key: value for key, value in insert_dict.items() if key in self.table.primary_key } jobs = config['schemata'][self.table.database].schema.jobs if check_reserved: reserved = (jobs & { 'table_name': self.table.table_name, 'key_hash': key_hash(primary_dict) }) if reserved: raise dj.DataJointError( f"Entry {primary_dict} has been reserved for table " f"{self.table.full_table_name}; " "change your primary key values.") if truth: try: self.table.insert1(insert_dict, **kwargs) except dj.DataJointError as e: raise dj.DataJointError( "An error occured while inserting into table " f"{self.table.full_table_name}: {e}") else: # editing entries savely # DO NOT remove primary keys with new update1 method insert_dict = { key: value for key, value in insert_dict.items() if ( # key not in self.table.primary_key # skip updating non-specified files # TODO fix for uploading files not (value is None and (self.fields[key].attr.is_blob or self.fields[key].attr.is_attachment))) } if insert_dict: try: self.table.update1(insert_dict) except dj.DataJointError as e: raise dj.DataJointError( "An error occured while updating table " f"{self.table.full_table_name}: {e}") return primary_dict
def make(self, key): """ discover files in local endpoint and transfer/register """ log.info('ArchivedVideoFile.make(): {}'.format(key)) # {'tracking_device': 'Camera 0', 'subject_id': 432572, 'session': 1} globus_alias = 'raw-video' le = GlobusStorageLocation.local_endpoint(globus_alias) lep, lep_sub, lep_dir = (le['endpoint'], le['endpoint_subdir'], le['endpoint_path']) re = (GlobusStorageLocation & {'globus_alias': globus_alias}).fetch1() rep, rep_sub = re['globus_endpoint'], re['globus_path'] log.info('local_endpoint: {}:{} -> {}'.format(lep, lep_sub, lep_dir)) log.info('remote_endpoint: {}:{}'.format(rep, rep_sub)) h2o = (lab.WaterRestriction & key).fetch1('water_restriction_number') session = (experiment.Session & key).fetch1() sdate = session['session_date'] sdate_sml = "{}{:02d}{:02d}".format(sdate.year, sdate.month, sdate.day) dev = (tracking.TrackingDevice & key).fetch1() trls = (experiment.SessionTrial & key).fetch(order_by='trial', as_dict=True) tracking_ingest = self.get_ingest() tdev = dev['tracking_device'] # NOQA: notused tpos = dev['tracking_position'] camtrial = '{}_{}_{}.txt'.format(h2o, sdate_sml, tpos) vbase = pathlib.Path(lep_dir, h2o, sdate_sml, 'video') campath = vbase / camtrial if not campath.exists(): # XXX: uses 1st found log.warning('trial map {} n/a! skipping.'.format(campath)) return log.info('loading trial map: {}'.format(campath)) vmap = { v: k for k, v in tracking_ingest.TrackingIngest.load_campath( campath).items() } log.debug('loaded video map: {}'.format(vmap)) # add ArchivedSession as_key = { k: v for k, v in key.items() if k in experiment.Session.primary_key } as_rec = {**as_key, 'globus_alias': globus_alias} ArchivedSession.insert1(as_rec, allow_direct_insert=True, skip_duplicates=True) # add DataSet ds_type = 'tracking-video' ds_name = '{}_{}_{}_{}'.format(h2o, sdate.isoformat(), ds_type, tpos) ds_key = {'globus_alias': globus_alias, 'dataset_name': ds_name} ds_rec = {**ds_key, 'dataset_type': ds_type} DataSet.insert1(ds_rec, allow_direct_insert=True) # add ArchivedVideoTracking vt_key = {**as_key, 'tracking_device': tdev} vt_rec = { **vt_key, 'globus_alias': globus_alias, 'dataset_name': ds_name } self.insert1(vt_rec) filetype = 'tracking-video-trial' for t in trls: trial = t['trial'] log.info('.. tracking trial {} ({})'.format(trial, t)) if t['trial'] not in vmap: log.warning('trial {} not in video map. skipping!'.format(t)) continue vmatch = '{}_{}_{}-*'.format(h2o, tpos, vmap[trial]) log.debug('vbase: {}, vmatch: {}'.format(vbase, vmatch)) vglob = list(vbase.glob(vmatch)) if len(vglob) != 1: emsg = 'incorrect videos found in {}: {}'.format(vbase, vglob) log.warning(emsg) raise dj.DataJointError(emsg) vfile = vglob[0].name gfile = '{}/{}/{}/{}'.format(h2o, sdate_sml, 'video', vfile) # subpath srcp = '{}:{}/{}'.format(lep, lep_sub, gfile) # source path dstp = '{}:{}/{}'.format(rep, rep_sub, gfile) # dest path gsm = self.get_gsm() gsm.activate_endpoint(lep) # XXX: cache / prevent duplicate RPC? gsm.activate_endpoint(rep) # XXX: cache / prevent duplicate RPC? log.info('transferring {} to {}'.format(srcp, dstp)) if not gsm.cp(srcp, dstp): emsg = "couldn't transfer {} to {}".format(srcp, dstp) log.error(emsg) raise dj.DataJointError(emsg) pf_key = {**ds_key, 'file_subpath': vfile} pf_rec = {**pf_key, 'file_type': filetype} DataSet.PhysicalFile.insert1({**pf_rec}, allow_direct_insert=True) trk_key = { k: v for k, v in { **key, 'trial': trial }.items() if k in experiment.SessionTrial.primary_key } tv_rec = {**vt_key, **trk_key, **pf_key} self.TrialVideo.insert1({**tv_rec})
def make(self, key): """ discover files in local endpoint and transfer/register """ log.debug(key) globus_alias = 'raw-ephys' le = GlobusStorageLocation.local_endpoint(globus_alias) lep, lep_sub, lep_dir = (le['endpoint'], le['endpoint_subdir'], le['endpoint_path']) re, rep, rep_sub = (GlobusStorageLocation() & { 'globus_alias': globus_alias }).fetch1().values() log.info('local_endpoint: {}:{} -> {}'.format(lep, lep_sub, lep_dir)) # Get session related information needed for filenames/records sinfo = ( lab.WaterRestriction * lab.Subject.proj() * experiment.Session() & key).fetch1() tinfo = ((lab.WaterRestriction * lab.Subject.proj() * experiment.Session() * experiment.SessionTrial) & key).fetch() h2o = sinfo['water_restriction_number'] sdate = sinfo['session_date'] subdir = pathlib.Path(h2o, str(sdate).replace('-', '')) # + probeno lep_subdir = pathlib.Path(lep_dir, subdir) probechoice = [str(i) for i in range(1, 10)] # XXX: hardcoded file_globs = { i['file_glob']: i['file_type'] for i in FileType & "file_type like 'ephys%%'" } # Process each probe folder for lep_probedir in lep_subdir.glob('*'): lep_probe = str(lep_probedir.relative_to(lep_subdir)) if lep_probe not in probechoice: log.info('skipping lep_probedir: {} - unexpected name'.format( lep_probedir)) continue lep_matchfiles = {} lep_probefiles = lep_probedir.glob('*.*') for pf in lep_probefiles: pfbase = pf.relative_to(lep_probedir) pfmatch = {k: pfbase.match(k) for k in file_globs} if any(pfmatch.values()): log.debug('found valid file: {}'.format(pf)) lep_matchfiles[pf] = tuple(k for k in pfmatch if pfmatch[k]) else: log.debug('skipping non-match file: {}'.format(pf)) continue # Build/Validate file records if not all([len(lep_matchfiles[i]) == 1 for i in lep_matchfiles]): # TODO: handle trial + concatenated match case... log.warning( 'files matched multiple types'.format(lep_matchfiles)) continue type_to_file = { file_globs[lep_matchfiles[mf][0]]: mf for mf in lep_matchfiles } ds_key, ds_name, ds_files, ds_trials = (None, None, None, [], []) if all(['trial' in t for t in type_to_file]): dataset_type = 'ephys-raw-trialized' ds_name = '{}_{}_{}'.format(h2o, sdate.isoformat(), dataset_type) ds_key = { 'dataset_name': ds_name, 'globus_storage_location': globus_alias } for t in type_to_file: fsp = type_to_file[t].relative_to(lep_dir) dsf = {**ds_key, 'file_subpath': str(fsp)} # e.g : 'tw34_g0_t0.imec.ap.meta' -> *_t(trial).* trial = int(fsp.name.split('_t')[1].split('.')[0]) if trial not in tinfo['trial']: log.warning( 'unknown trial file: {}. skipping'.format(dsf)) continue ds_trials.append({**dsf, 'trial': trial}) ds_files.append({**dsf, 'file_type': t}) elif all(['concat' in t for t in type_to_file]): dataset_type = 'ephys-raw-continuous' ds_name = '{}_{}_{}'.format(h2o, sdate.isoformat(), dataset_type) ds_key = { 'dataset_name': ds_name, 'globus_storage_location': globus_alias } for t in type_to_file: fsp = type_to_file[t].relative_to(lep_dir) ds_files.append({ **ds_key, 'file_subpath': str(fsp), 'file_type': t }) else: log.warning("couldn't determine dataset type for {}".format( lep_probedir)) continue # Transfer Files gsm = self.get_gsm() gsm.activate_endpoint(lep) # XXX: cache / prevent duplicate RPC? gsm.activate_endpoint(rep) # XXX: cache / prevent duplicate RPC? DataSet.insert1({ **ds_key, 'dataset_type': dataset_type }, allow_direct_insert=True) for f in ds_files: fsp = ds_files[f]['file_subpath'] srcp = '{}:{}/{}'.format(lep, lep_sub, fsp) dstp = '{}:{}/{}'.format(rep, rep_sub, fsp) log.info('transferring {} to {}'.format(srcp, dstp)) # XXX: check if exists 1st? if not gsm.cp(srcp, dstp): emsg = "couldn't transfer {} to {}".format(srcp, dstp) log.error(emsg) raise dj.DataJointError(emsg) DataSet.PhysicalFile.insert1({ **ds_key, **ds_files[f] }, allow_direct_insert=True) # Add Records ArchivedSession.insert1( { **key, 'globus_storage_location': globus_alias }, skip_duplicates=True, allow_direct_insert=True) ArchivedRawEphys.insert1( { **key, **ds_key, 'probe_folder': int(str(lep_probe)) }, allow_direct_insert=True) if dataset_type == 'ephys-raw-trialized': ArchivedRawEphys.ArchivedTrials.insert( [{ **key, **t } for t in ds_trials], allow_direct_insert=True)
def make(self, key): ''' determine available files from local endpoint and publish (create database records and transfer to globus) ''' # >>> list(key.keys()) # ['subject_id', 'session', 'trial', 'electrode_group', 'globus_alias'] log.debug(key) globus_alias = 'raw-ephys' le = GlobusStorageLocation.local_endpoint(globus_alias) lep, lep_sub, lep_dir = (le['endpoint'], le['endpoint_subdir'], le['endpoint_path']) log.info('local_endpoint: {}:{} -> {}'.format(lep, lep_sub, lep_dir)) # get session related information needed for filenames/records sinfo = ((lab.WaterRestriction * lab.Subject.proj() * experiment.Session() * experiment.SessionTrial) & key).fetch1() h2o = sinfo['water_restriction_number'] sdate = sinfo['session_date'] eg = key['electrode_group'] trial = key['trial'] # build file locations: # subdir - common subdirectory for globus/native filesystem # fpat: base file pattern for this sessions files # fbase: filesystem base path for this sessions files # gbase: globus-url base path for this sessions files subdir = os.path.join(h2o, str(sdate), str(eg)) fpat = '{}_{}_{}_g0_t{}'.format(h2o, sdate, eg, trial) fbase = os.path.join(lep_dir, subdir, fpat) gbase = '/'.join((h2o, str(sdate), str(eg), fpat)) # check for existence of actual files & use to build xfer list log.debug('checking {}'.format(fbase)) ffound = [] ftypes = RawEphysFileTypes.contents for ft in ftypes: fname = '{}{}'.format(fbase, ft[1]) gname = '{}{}'.format(gbase, ft[1]) if not os.path.exists(fname): log.debug('... {}: not found'.format(fname)) continue log.debug('... {}: found'.format(fname)) ffound.append(( ft, gname, )) # if files are found, transfer and create publication schema records if not len(ffound): log.info('no files found for key') return log.info('found files for key: {}'.format([f[1] for f in ffound])) repname, rep, rep_sub = (GlobusStorageLocation() & { 'globus_alias': globus_alias }).fetch(limit=1)[0] gsm = self.get_gsm() gsm.activate_endpoint(lep) # XXX: cache this / prevent duplicate RPC? gsm.activate_endpoint(rep) # XXX: cache this / prevent duplicate RPC? if not self & key: log.info('ArchivedRawEphysTrial.insert1()') self.insert1({**key, 'globus_alias': globus_alias}) ftmap = { 'ap-30kHz': ArchivedRawEphysTrial.ArchivedApChannel, 'ap-30kHz-meta': ArchivedRawEphysTrial.ArchivedApMeta, 'lf-2.5kHz': ArchivedRawEphysTrial.ArchivedLfChannel, 'lf-2.5kHz-meta': ArchivedRawEphysTrial.ArchivedLfMeta } for ft, gname in ffound: # XXX: transfer/insert could be batched ft_class = ftmap[ft[0]] if not ft_class & key: srcp = '{}:/{}/{}'.format(lep, lep_sub, gname) dstp = '{}:/{}/{}'.format(rep, rep_sub, gname) log.info('transferring {} to {}'.format(srcp, dstp)) # XXX: check if exists 1st? (manually or via API copy-checksum) if not gsm.cp(srcp, dstp): emsg = "couldn't transfer {} to {}".format(srcp, dstp) log.error(emsg) raise dj.DataJointError(emsg) log.info('ArchivedRawEphysTrial.{}.insert1()'.format( ft_class.__name__)) ft_class.insert1(key)
def make(self, key): ''' determine available files from local endpoint and publish (create database records and transfer to globus) ''' log.info('ArchivedVideoFile.make(): {}'.format(key)) globus_alias = 'raw-video' le = GlobusStorageLocation.local_endpoint(globus_alias) lep, lep_sub, lep_dir = (le['endpoint'], le['endpoint_subdir'], le['endpoint_path']) log.info('local_endpoint: {}:{} -> {}'.format(lep, lep_sub, lep_dir)) h2o = (lab.WaterRestriction & key).fetch1('water_restriction_number') trial = key['trial'] session = (experiment.Session & key).fetch1() sdate = session['session_date'] sdate_iso = sdate.isoformat() # YYYY-MM-DD sdate_sml = "{}{:02d}{:02d}".format(sdate.year, sdate.month, sdate.day) trk = (tracking.TrackingDevice * (tracking.Tracking & key).proj()).fetch1() tdev = trk['tracking_device'] # NOQA: notused tpos = trk['tracking_position'] camtrial = '{}_{}_{}.txt'.format(h2o, sdate_sml, tpos) tracking_ingest = self.get_ingest() tpaths = tracking_ingest.TrackingDataPath.fetch(as_dict=True) campath = None tbase, vbase = None, None # tracking, video session base paths for p in tpaths: tdat = p['tracking_data_path'] tbase = pathlib.Path(tdat, h2o, sdate_iso, 'tracking') vbase = pathlib.Path(tdat, h2o, sdate_iso, 'video') campath = tbase / camtrial log.debug('trying camera position trial map: {}'.format(campath)) if campath.exists(): # XXX: uses 1st found break log.debug('tracking path {} n/a - skipping'.format(tbase)) campath = None if not campath: log.warning('tracking data not found for {} '.format(tpos)) return tmap = tracking_ingest.TrackingIngest.load_campath(campath) if trial not in tmap: log.warning('nonexistant trial {}.. skipping'.format(trial)) return repname, rep, rep_sub = (GlobusStorageLocation & { 'globus_alias': globus_alias }).fetch(limit=1)[0] vmatch = '{}_{}_{}-*'.format(h2o, tpos, tmap[trial]) vglob = list(vbase.glob(vmatch)) if len(vglob) != 1: # XXX: error instead of warning? log.warning('more than one video found: {}'.format(vglob)) return vfile = vglob[0].name gfile = '{}/{}/{}/{}'.format(h2o, sdate_iso, 'video', vfile) # subpath srcp = '{}:{}/{}'.format(lep, lep_sub, gfile) # source path dstp = '{}:{}/{}'.format(rep, rep_sub, gfile) # dest path gsm = self.get_gsm() gsm.activate_endpoint(lep) # XXX: cache this / prevent duplicate RPC? gsm.activate_endpoint(rep) # XXX: cache this / prevent duplicate RPC? log.info('transferring {} to {}'.format(srcp, dstp)) if not gsm.cp(srcp, dstp): emsg = "couldn't transfer {} to {}".format(srcp, dstp) log.error(emsg) raise dj.DataJointError(emsg) self.insert1({ **key, 'globus_alias': globus_alias, 'video_file_name': vfile })