def writeThumbnailImage(self, filename, year, md5): try: scanDir = '%s/%s/%s' % (self.dzi, year, md5) channels = [] for channel in os.listdir(scanDir): if os.path.isdir('%s%s%s' % (scanDir, os.sep, channel)): channels.append(channel) outdir = '%s/%s' % (self.thumbnails, year) if not os.path.exists(outdir): os.makedirs(outdir) shutil.copyfile( '%s/%s/%s/%s/0/0_0.jpg' % (self.dzi, year, md5, channels[0]), '%s/%s.jpg' % (outdir, md5)) thumbnail = '/thumbnails/%s/%s.jpg' % (urlquote(year), urlquote(md5)) urls = [] for channel in channels: urls.append('url=/data/%s/%s/%s/ImageProperties.xml' % (year, md5, channel)) return (thumbnail, '&'.join(urls)) except: et, ev, tb = sys.exc_info() self.logger.error('got unexpected exception "%s"' % str(ev)) self.logger.error('%s' % str(traceback.format_exception(et, ev, tb))) self.sendMail('FAILURE Tiles: write thumbnail ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb))) os.remove(filename) return (None, None)
def getRowFilter(self, row): filters = [] key = None for k in self.find_keys(): if self.row_has_key(row, k): key = k break if key is None: raise ValueError("can't find appropriate key") for k in key.get('unique_columns'): filters.append("{k}={v}".format(k=urlquote(k), v=urlquote(row[k]))) return filters
def getCatalogTable(asset_mapping, metadata_dict=None): schema_name, table_name = asset_mapping.get('target_table', [None, None]) if not (schema_name and table_name): metadata_dict_lower = { k.lower(): v for k, v in metadata_dict.items() } schema_name = metadata_dict_lower.get("schema") table_name = metadata_dict_lower.get("table") if not (schema_name and table_name): raise ValueError( "Unable to determine target catalog table for asset type.") return '%s:%s' % (urlquote(schema_name), urlquote(table_name))
def deleteFromHatrac(self): url = '/entity/Common:Delete_Hatrac/Hatrac_Deleted=FALSE/Processing_Status=in%20progress;Processing_Status::null::' resp = self.catalog.get(url) resp.raise_for_status() files = resp.json() fileids = [] for f in files: fileids.append((f['Hatrac_URI'], f['RID'])) self.logger.debug('Deleting from hatrac %d files(s).' % (len(fileids))) for hatrac_uri,rid in fileids: try: self.store.del_obj(hatrac_uri) self.logger.debug('SUCCEEDED deleted from hatrac the "%s" file.' % (hatrac_uri)) columns = ["Hatrac_Deleted", "Processing_Status"] columns = ','.join([urlquote(col) for col in columns]) url = '/attributegroup/Common:Delete_Hatrac/RID;%s' % (columns) obj = {'RID': rid, 'Hatrac_Deleted': True, 'Processing_Status': 'success' } self.catalog.put( url, json=[obj] ) self.logger.debug('SUCCEEDED updated the Common:Delete_Hatrac table entry for the Hatrac URL: "%s".' % (hatrac_uri)) except Exception as e: et, ev, tb = sys.exc_info() self.logger.error('got exception "%s"' % str(ev)) self.logger.error('%s' % str(traceback.format_exception(et, ev, tb))) self.reportFailure(rid, str(e))
def deleteFromHatrac(self): url = '/entity/Common:Delete_Hatrac/Hatrac_Deleted=FALSE/Processing_Status=in%20progress;Processing_Status::null::' resp = self.catalog.get(url) resp.raise_for_status() files = resp.json() fileids = [] for f in files: fileids.append((f['Hatrac_URI'], f['RID'])) self.logger.debug('Deleting from hatrac %d files(s).' % (len(fileids))) for hatrac_uri, rid in fileids: try: self.store.del_obj(hatrac_uri) self.logger.debug( 'SUCCEEDED deleted from hatrac the "%s" file.' % (hatrac_uri)) columns = ["Hatrac_Deleted", "Processing_Status"] columns = ','.join([urlquote(col) for col in columns]) url = '/attributegroup/Common:Delete_Hatrac/RID;%s' % (columns) obj = { 'RID': rid, 'Hatrac_Deleted': True, 'Processing_Status': 'success' } self.catalog.put(url, json=[obj]) self.logger.debug( 'SUCCEEDED updated the Common:Delete_Hatrac table entry for the Hatrac URL: "%s".' % (hatrac_uri)) except Exception as e: et, ev, tb = sys.exc_info() self.logger.error('got exception "%s"' % str(ev)) self.logger.error('%s' % str(traceback.format_exception(et, ev, tb))) self.reportFailure(rid, str(e))
def exists(self, tablename): # check if table exists in ermrest catalog assert isinstance(self._ermrest_catalog, ErmrestCatalog) sname, tname = self._parse_table_name(tablename) try: path = '/schema/%s/table/%s' % (urlquote(sname), urlquote(tname)) r = self._ermrest_catalog.get(path) r.raise_for_status() resp = r.json() return resp is not None except HTTPError as e: if e.response.status_code == 404: return False else: raise e
def put_row_update(self, update_row): self.catalog.put('%s;%s' % (self.unit.put_update_baseurl, ','.join([ urlquote(col, safe='') for col in list(update_row.keys()) if col not in ['ID', 'RID'] ])), json=[update_row]) sys.stderr.write('\nupdated in ERMrest: %s' % json.dumps(update_row, indent=2))
def get_data(): r = self.catalog.get( '/entity/CFDE:%s@sort(RID)?limit=%d' % ( urlquote(resource['name']), self.batch_size, )) rows = r.json() yield rows while rows: last = rows[-1]['RID'] r = self.catalog.get( '/entity/CFDE:%s@sort(RID)@after(%s)?limit=%d' % ( urlquote(resource['name']), urlquote(last), self.batch_size, )) rows = r.json() yield rows
def _urlEncodeMetadata(self, safe_overrides=None): urlencoded = dict() if not safe_overrides: safe_overrides = dict() for k, v in self.metadata.items(): if k.endswith("_urlencoded"): continue urlencoded[k + "_urlencoded"] = urlquote(str(v), safe_overrides.get(k, "")) self._updateFileMetadata(urlencoded)
def _urlencode_envars(self, safe_overrides=None): urlencoded = dict() if not safe_overrides: safe_overrides = dict() for k, v in self.envars.items(): if k.endswith("_urlencoded"): continue urlencoded[k + "_urlencoded"] = urlquote(str(v), safe_overrides.get(k, "")) self.envars.update(urlencoded)
def cleanup_restored_catalog(self): # cleanup restore state markers logging.info("Cleaning up restore state...") dst_model = self.dst_catalog.getCatalogModel() for sname, schema in dst_model.schemas.items(): for tname, table in schema.tables.items(): annotation_uri = "/schema/%s/table/%s/annotation/%s" % ( urlquote(sname), urlquote(tname), urlquote(self.RESTORE_STATE_URL)) try: self.dst_catalog.delete(annotation_uri) except Exception as e: logging.warning( "Unable to cleanup restore state marker annotation %s: %s" % (annotation_uri, format_exception(e))) continue # truncate restore history if self.truncate_after: logging.info("Truncating restore history...") snaptime = self.dst_catalog.get("/").json()["snaptime"] self.dst_catalog.delete("/history/,%s" % urlquote(snaptime))
def on_actionRefresh_triggered(self): if not self.identity: self.updateStatus("Unable to get worklist -- not logged in.") return qApp.setOverrideCursor(Qt.WaitCursor) self.disableControls() self.updateStatus("Refreshing worklist...") queryTask = CatalogQueryTask(self.catalog) queryTask.status_update_signal.connect(self.onRefreshResult) if self.is_curator() and self.curator_mode: queryTask.query(WORKLIST_CURATOR_QUERY) else: queryTask.query(WORKLIST_QUERY % urlquote(self.identity, ""))
def _catalogRecordUpdate(self, catalog_table, old_row, new_row): """ :param catalog_table: :param new_row: :param old_row: :return: """ if self.cancelled: return None try: keys = sorted(list(new_row.keys())) old_keys = sorted(list(old_row.keys())) if keys != old_keys: raise RuntimeError( "Cannot update catalog - " "new row column list and old row column list do not match: New: %s != Old: %s" % (keys, old_keys)) combined_row = { 'o%d' % i: old_row[keys[i]] for i in range(len(keys)) } combined_row.update( {'n%d' % i: new_row[keys[i]] for i in range(len(keys))}) update_uri = '/attributegroup/%s/%s;%s' % (catalog_table, ','.join( ["o%d:=%s" % (i, urlquote(keys[i])) for i in range(len(keys))]), ','.join([ "n%d:=%s" % (i, urlquote(keys[i])) for i in range(len(keys)) ])) logging.debug( "Attempting catalog record update [%s] with data: %s" % (update_uri, json.dumps(combined_row))) return self.catalog.put(update_uri, json=[combined_row]).json() except: (etype, value, traceback) = sys.exc_info() raise CatalogUpdateError(format_exception(value))
def writeThumbnailImage(self, filename, year, md5): try: scanDir='%s/%s/%s' % (self.dzi, year, md5) channels = [] for channel in os.listdir(scanDir): if os.path.isdir('%s%s%s' % (scanDir, os.sep, channel)): channels.append( channel) outdir = '%s/%s' % (self.thumbnails, year) if not os.path.exists(outdir): os.makedirs(outdir) shutil.copyfile('%s/%s/%s/%s/0/0_0.jpg' % (self.dzi, year, md5, channels[0]), '%s/%s.jpg' % (outdir, md5)) thumbnail = '/thumbnails/%s/%s.jpg' % (urlquote(year), urlquote(md5)) urls = [] for channel in channels: urls.append('url=/data/%s/%s/%s/ImageProperties.xml' % (year, md5, channel)) return (thumbnail, '&'.join(urls)) except: et, ev, tb = sys.exc_info() self.logger.error('got unexpected exception "%s"' % str(ev)) self.logger.error('%s' % str(traceback.format_exception(et, ev, tb))) self.sendMail('FAILURE Tiles: write thumbnail ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb))) os.remove(filename) return (None, None)
def _uploadAsset(self, file_path, asset_mapping, match_groupdict, callback=None): # 1. Populate metadata by querying the catalog self._queryFileMetadata(file_path, asset_mapping, match_groupdict) # 2. If "create_record_before_upload" specified in asset_mapping, check for an existing record, creating a new # one if necessary. Otherwise delay this logic until after the file upload. record = None if stob(asset_mapping.get("create_record_before_upload", False)): record = self._getFileRecord(asset_mapping) # 3. Perform the Hatrac upload self._getFileHatracMetadata(asset_mapping) hatrac_options = asset_mapping.get("hatrac_options", {}) versioned_uri = \ self._hatracUpload(self.metadata["URI"], file_path, md5=self.metadata.get("md5_base64"), sha256=self.metadata.get("sha256_base64"), content_type=self.guessContentType(file_path), content_disposition=self.metadata.get("content-disposition"), chunked=True, create_parents=stob(hatrac_options.get("create_parents", True)), allow_versioning=stob(hatrac_options.get("allow_versioning", True)), callback=callback) logging.debug("Hatrac upload successful. Result object URI: %s" % versioned_uri) if stob(hatrac_options.get("versioned_uris", True)): self.metadata["URI"] = versioned_uri else: self.metadata["URI"] = versioned_uri.rsplit(":")[0] self.metadata["URI_urlencoded"] = urlquote(self.metadata["URI"]) # 3. Check for an existing record and create a new one if necessary if not record: record = self._getFileRecord(asset_mapping) # 4. Update an existing record, if necessary column_map = asset_mapping.get("column_map", {}) updated_record = self.interpolateDict(self.metadata, column_map) if updated_record != record: logging.info("Updating catalog for file [%s]" % self.getFileDisplayName(file_path)) self._catalogRecordUpdate(self.metadata['target_table'], record, updated_record)
def provision(self): if 'CFDE' not in self.cat_model_root.schemas: # blindly load the whole model on an apparently empty catalog self.catalog.post('/schema', json=self.model_doc).raise_for_status() else: # do some naively idempotent model definitions on existing catalog # adding missing tables and missing columns need_tables = [] need_columns = [] hazard_fkeys = {} for ntable in self.doc_cfde_schema.tables.values(): table = self.cat_cfde_schema.tables.get(ntable.name) if table is not None: for ncolumn in ntable.column_definitions: column = table.column_definitions.elements.get( ncolumn.name) if column is not None: # TODO: check existing columns for compatibility? pass else: cdoc = ncolumn.prejson() cdoc.update({ 'table_name': table.name, 'nullok': True }) need_columns.append(cdoc) # TODO: check existing table keys/foreign keys for compatibility? else: tdoc = ntable.prejson() tdoc['schema_name'] = 'CFDE' need_tables.append(tdoc) if need_tables: logger.debug("Added tables %s" % ([tdoc['table_name'] for tdoc in need_tables])) self.catalog.post('/schema', json=need_tables).raise_for_status() for cdoc in need_columns: self.catalog.post('/schema/CFDE/table/%s/column' % urlquote(cdoc['table_name']), json=cdoc).raise_for_status() logger.debug("Added column %s.%s" % (cdoc['table_name'], cdoc['name'])) self.get_model() self.provision_dataset_ancestor_tables() self.provision_denorm_tables()
def metadata_query_url(image_id): """Build ERMrest query URL returning metadata record needed by class.""" return ( '/attributegroup' '/I:=Zebrafish:Image/ID=%(id)s;RID=%(id)s' '/AS1:=left(I:Alignment%%20Standard)=(Zebrafish:Alignment%%20Standard:RID)' '/ASI1:=left(AS1:Image)=(Zebrafish:Image:RID)' '/AS2:=left(ASI1:Alignment%%20Standard)=(Zebrafish:Alignment%%20Standard:RID)' '/ASI2:=left(AS2:Image)=(Zebrafish:Image:RID)' '/$I' '/*' ';ASI1_obj:=array(ASI1:*)' ',AS1_obj:=array(AS1:*)' ',ASI2_obj:=array(ASI2:*)' ',AS2_obj:=array(AS2:*)') % { 'id': urlquote(image_id), }
def deleteFromYouTube(self): url = '/entity/Common:Delete_Youtube/Youtube_Deleted=FALSE/Processing_Status=in%20progress;Processing_Status::null::' resp = self.catalog.get(url) resp.raise_for_status() files = resp.json() fileids = [] for f in files: fileids.append((f['YouTube_URI'], f['RID'])) self.logger.debug('Deleting from YouTube %d videos(s).' % (len(fileids))) for youtube_uri, rid in fileids: try: youtube_deleted = self.youtube_delete(youtube_uri) if youtube_deleted == True: self.logger.debug( 'SUCCEEDED deleted from YouTube the video with the URL: "%s".' % (youtube_uri)) columns = ["Youtube_Deleted", "Processing_Status"] columns = ','.join([urlquote(col) for col in columns]) url = '/attributegroup/Common:Delete_Youtube/RID;%s' % ( columns) obj = { 'RID': rid, 'Youtube_Deleted': True, 'Processing_Status': 'success' } self.catalog.put(url, json=[obj]) self.logger.debug( 'SUCCEEDED updated the Common:Delete_Youtube table entry for the YouTube URL: "%s".' % (youtube_uri)) else: self.logger.debug( 'Failure in deleting from YouTube the video with the URL: "%s".' % (youtube_uri)) self.sendMail( 'FAILURE Delete YouTube: YouTube Failure', 'The video "%s" could not be deleted from Youtube.' % youtube_uri) self.reportFailure(rid, 'YouTube Failure') except Exception as e: et, ev, tb = sys.exc_info() self.logger.error('got exception "%s"' % str(ev)) self.logger.error('%s' % str(traceback.format_exception(et, ev, tb))) self.reportFailure(rid, str(e))
def updateAttributes (self, path, rid, columns, row): """ Update the ermrest attributes with the row values. """ try: columns = ','.join([urlquote(col) for col in columns]) url = '/attributegroup/%s/RID;%s' % (path, columns) resp = self.catalog.put( url, json=[row] ) resp.raise_for_status() self.logger.debug('SUCCEEDED updated the table "%s" for the RID "%s" with "%s".' % (path, rid, json.dumps(row, indent=4))) except: et, ev, tb = sys.exc_info() self.logger.error('got exception "%s"' % str(ev)) self.logger.error('%s' % str(traceback.format_exception(et, ev, tb))) self.sendMail('FAILURE Tiles: reportFailure ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
def provision(self): if 'CFDE' not in self.model_root.schemas: # blindly load the whole model on an apparently empty catalog self.catalog.post('/schema', json=self.model_doc).raise_for_status() else: # do some naively idempotent model definitions on existing catalog # adding missing tables and missing columns need_tables = [] need_columns = [] hazard_fkeys = {} for tname, tdoc in self.model_doc['schemas']['CFDE'][ 'tables'].items(): if tname in self.cfde_schema.tables: table = self.cfde_schema.tables[tname] for cdoc in tdoc['column_definitions']: if cdoc['name'] in table.column_definitions.elements: column = table.column_definitions.elements[ cdoc['name']] # TODO: check existing columns for compatibility? else: cdoc.update({'table_name': tname, 'nullok': True}) need_columns.append(cdoc) # TODO: check existing table keys/foreign keys for compatibility? else: tdoc['schema_name'] = 'CFDE' need_tables.append(tdoc) if need_tables: if self.verbose: print("Added tables %s" % ([tdoc['table_name'] for tdoc in need_tables])) self.catalog.post('/schema', json=need_tables).raise_for_status() for cdoc in need_columns: self.catalog.post('/schema/CFDE/table/%s/column' % urlquote(cdoc['table_name']), json=cdoc).raise_for_status() if self.verbose: print("Added column %s.%s" % (cdoc['table_name'], cdoc['name'])) self.get_model()
def updateAttributes(self, path, rid, columns, row): """ Update the ermrest attributes with the row values. """ try: columns = ','.join([urlquote(col) for col in columns]) url = '/attributegroup/%s/RID;%s' % (path, columns) resp = self.catalog.put(url, json=[row]) resp.raise_for_status() self.logger.debug( 'SUCCEEDED updated the table "%s" for the RID "%s" with "%s".' % (path, rid, json.dumps(row, indent=4))) except: et, ev, tb = sys.exc_info() self.logger.error('got exception "%s"' % str(ev)) self.logger.error('%s' % str(traceback.format_exception(et, ev, tb))) self.sendMail('FAILURE Tiles: reportFailure ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
def load_data_files(self): tables_doc = self.model_doc['schemas']['CFDE']['tables'] for tname in self.data_tnames_topo_sorted(): # we are doing a clean load of data in fkey dependency order table = self.cat_model_root.table("CFDE", tname) resource = tables_doc[tname]["annotations"].get( self.resource_tag, {}) if "path" in resource: fname = "%s/%s" % (self.dirname, resource["path"]) with open(fname, "r") as f: # translate TSV to python dicts reader = csv.reader(f, delimiter="\t") row2dict = self.make_row2dict(table, next(reader)) entity_url = "/entity/CFDE:%s" % urlquote(table.name) batch_size = 50000 # TODO: Should this be configurable? # Batch catalog ingests; too-large ingests will hang and fail # Largest known CFDE ingest has file with >5m rows batch = [] for raw_row in reader: # Collect full batch, then post at once batch.append(row2dict(raw_row)) if len(batch) >= batch_size: try: self.catalog.post(entity_url, json=batch) logger.debug("Batch of rows for %s loaded" % table.name) except Exception as e: logger.error("Table %s data load FAILED from " "%s: %s" % (table.name, fname, e)) raise else: batch.clear() # After reader exhausted, ingest final batch if len(batch) > 0: try: self.catalog.post(entity_url, json=batch) except Exception as e: logger.error("Table %s data load FAILED from " "%s: %s" % (table.name, fname, e)) raise logger.info("All data for table %s loaded from %s." % (table.name, fname))
def reportFailure(self, rid, error_message): """ Update the Delete_Youtube table with the failure result. """ try: columns = ["Processing_Status"] columns = ','.join([urlquote(col) for col in columns]) url = '/attributegroup/Common:Delete_Youtube/RID;%s' % (columns) obj = {'RID': rid, 'Processing_Status': '%s' % error_message} self.catalog.put(url, json=[obj]) self.logger.debug( 'SUCCEEDED updated the Delete_Youtube table for the RID "%s" with the Processing_Status result "%s".' % (rid, error_message)) except: et, ev, tb = sys.exc_info() self.logger.error('got exception "%s"' % str(ev)) self.logger.error('%s' % str(traceback.format_exception(et, ev, tb))) self.sendMail('FAILURE Delete YouTube: reportFailure ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
def load_data_files(self): tables_doc = self.model_doc['schemas']['CFDE']['tables'] for tname in self.data_tnames_topo_sorted(): # we are doing a clean load of data in fkey dependency order table = self.model_root.table("CFDE", tname) resource = tables_doc[tname]["annotations"].get( self.resource_tag, {}) if "path" in resource: fname = "%s/%s" % (self.dirname, resource["path"]) with open(fname, "r") as f: # translate TSV to python dicts reader = csv.reader(f, delimiter="\t") raw_rows = list(reader) row2dict = self.make_row2dict(table, raw_rows[0]) dict_rows = [row2dict(row) for row in raw_rows[1:]] self.catalog.post("/entity/CFDE:%s" % urlquote(table.name), json=dict_rows) if self.verbose: print("Table %s data loaded from %s." % (table.name, fname))
def reportFailure(self, rid, error_message): """ Update the Delete_Youtube table with the failure result. """ try: columns = ["Processing_Status"] columns = ','.join([urlquote(col) for col in columns]) url = '/attributegroup/Common:Delete_Youtube/RID;%s' % (columns) obj = {'RID': rid, 'Processing_Status': '%s' % error_message } self.catalog.put( url, json=[obj] ) self.logger.debug('SUCCEEDED updated the Delete_Youtube table for the RID "%s" with the Processing_Status result "%s".' % (rid, error_message)) except: et, ev, tb = sys.exc_info() self.logger.error('got exception "%s"' % str(ev)) self.logger.error('%s' % str(traceback.format_exception(et, ev, tb))) self.sendMail('FAILURE Delete YouTube: reportFailure ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
def reportFailure(self, accessionId, error_message): """ Update the Slide_Video table with the YouTube Upload failure result. """ try: columns = ["Processing_Status"] columns = ','.join([urlquote(col) for col in columns]) url = '/attributegroup/Immunofluorescence:Slide_Video/Accession_ID;%s' % (columns) obj = {'Accession_ID': accessionId, "Processing_Status": '%s' % error_message } self.catalog.put( url, json=[obj] ) self.logger.debug('SUCCEEDED updated the Slide_Video table for the video Accession_ID "%s" with the Processing_Status result "%s".' % (accessionId, error_message)) except: et, ev, tb = sys.exc_info() self.logger.error('got unexpected exception "%s"' % str(ev)) self.logger.error('%s' % str(traceback.format_exception(et, ev, tb))) self.sendMail('FAILURE YouTube Upload: reportFailure ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
def metadata_query_url(study_id): """Build ERMrest query URL returning metadata record needed by class.""" return ('/attributegroup/' 'NPS:=%(nps)s/ID=%(sid)s;RID=%(sid)s/' 'IPS:=(NPS:Study)/' 'N1:=(NPS:%(n1)s)/' 'N2:=(NPS:%(n2)s)/' 'I1:=(N1:%(si)s)/' '$NPS/' '*;' 'I1:%(zs)s,' 'IPS:Alignment,' 'n1:=N1:%(sfu)s,' 'n2:=N2:%(sfu)s,') % { 'sid': urlquote(study_id), 'nps': urlquote('Nucleic Pair Study'), 'n1': urlquote('Nucleic Region 1'), 'n2': urlquote('Nucleic Region 2'), 'si': urlquote('Source Image'), 'zs': urlquote('ZYX Spacing'), 'sfu': urlquote('Segments Filtered URL'), }
def reportFailure(self, accessionId, error_message): """ Update the Slide_Video table with the YouTube Upload failure result. """ try: columns = ["Processing_Status"] columns = ','.join([urlquote(col) for col in columns]) url = '/attributegroup/Immunofluorescence:Slide_Video/Accession_ID;%s' % ( columns) obj = { 'Accession_ID': accessionId, "Processing_Status": '%s' % error_message } self.catalog.put(url, json=[obj]) self.logger.debug( 'SUCCEEDED updated the Slide_Video table for the video Accession_ID "%s" with the Processing_Status result "%s".' % (accessionId, error_message)) except: et, ev, tb = sys.exc_info() self.logger.error('got unexpected exception "%s"' % str(ev)) self.logger.error('%s' % str(traceback.format_exception(et, ev, tb))) self.sendMail('FAILURE YouTube Upload: reportFailure ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
def deleteFromYouTube(self): url = '/entity/Common:Delete_Youtube/Youtube_Deleted=FALSE/Processing_Status=in%20progress;Processing_Status::null::' resp = self.catalog.get(url) resp.raise_for_status() files = resp.json() fileids = [] for f in files: fileids.append((f['YouTube_URI'], f['RID'])) self.logger.debug('Deleting from YouTube %d videos(s).' % (len(fileids))) for youtube_uri,rid in fileids: try: youtube_deleted = self.youtube_delete(youtube_uri) if youtube_deleted == True: self.logger.debug('SUCCEEDED deleted from YouTube the video with the URL: "%s".' % (youtube_uri)) columns = ["Youtube_Deleted", "Processing_Status"] columns = ','.join([urlquote(col) for col in columns]) url = '/attributegroup/Common:Delete_Youtube/RID;%s' % (columns) obj = {'RID': rid, 'Youtube_Deleted': True, 'Processing_Status': 'success' } self.catalog.put( url, json=[obj] ) self.logger.debug('SUCCEEDED updated the Common:Delete_Youtube table entry for the YouTube URL: "%s".' % (youtube_uri)) else: self.logger.debug('Failure in deleting from YouTube the video with the URL: "%s".' % (youtube_uri)) self.sendMail('FAILURE Delete YouTube: YouTube Failure', 'The video "%s" could not be deleted from Youtube.' % youtube_uri) self.reportFailure(rid, 'YouTube Failure') except Exception as e: et, ev, tb = sys.exc_info() self.logger.error('got exception "%s"' % str(ev)) self.logger.error('%s' % str(traceback.format_exception(et, ev, tb))) self.reportFailure(rid, str(e))
parser.add_option('-t', '--RMT', action='store', dest='RMT', type='string', help='Modification Timestamp') (options, args) = parser.parse_args() if not options.server: print ('ERROR: Missing host name') sys.exit() if not options.credentials: print ('ERROR: Missing credentials file') sys.exit() if not options.RMT: RMT = '' else: RMT = '&RMT::geq::%s' % (urlquote(options.RMT)) """ Get the non NULL "Thumbnail" values from the "Scan" table. """ servername = options.server credentialsfilename = options.credentials catalog = 1 schema = 'Microscopy' table = 'Scan' column = 'Thumbnail' prefix = '/var/www/html' output = '%s_add_border.sh' % servername.split('.')[0] credentials = json.load(open(credentialsfilename)) catalog = ErmrestCatalog('https', servername, catalog, credentials)
def _post_parser_init(self, args): """Shared initialization for all sub-commands. """ self.host = args.host if args.host else 'localhost' self.resource = urlquote(args.resource, '/') self.store = HatracStore('https', args.host, DerivaHatracCLI._get_credential(self.host, args.token))
def uploadVideo(self): url = '/entity/Immunofluorescence:Slide_Video/!Identifier::null::&!Name::null::&!Bytes::null::&Media_Type=video%2Fmp4/Processing_Status=in%20progress;Processing_Status::null::' resp = self.catalog.get(url) resp.raise_for_status() videos = resp.json() videoids = [] for video in videos: videoids.append( (video['Accession_ID'], video['Name'], video['Title'], video['Description'], video['Identifier'], video['MD5'], video['YouTube_MD5'], video['YouTube_URI'], video['RID'], video['Consortium'], video['MP4_URI'], video['RCT'], video['RMT'])) self.logger.debug('Processing %d video(s).' % (len(videoids))) for accessionId, fileName, title, description, uri, md5, youtube_md5, youtube_uri, rid, consortium, mp4_uri, rct, rmt in videoids: if description == None: description = '' consortium_url = '' if consortium == 'GUD': consortium_url = 'gudmap.org' elif consortium == 'RBK': consortium_url = 'rebuildingakidney.org' f, MP4_URI = self.getVideoFile(fileName, uri, consortium_url, md5, accessionId) if f == None or MP4_URI == None: self.reportFailure(accessionId, 'error_no_video_file') continue if youtube_uri != None and youtube_md5 != md5: """ We have an update. Mark the video to be deleted from YouTube """ url = '/entity/Common:Delete_Youtube?defaults=RID,RCT,RMT' obj = { 'YouTube_MD5': youtube_md5, 'YouTube_URI': youtube_uri, 'Record_Type': 'Immunofluorescence:Slide_Video', 'Record_RID': rid, 'Youtube_Deleted': False } try: r = self.catalog.post(url, json=[obj]) r.raise_for_status() self.logger.debug( 'SUCCEEDED updated the Common:Delete_Youtube table entry for the YouTube URL: "%s".' % (youtube_uri)) except: et, ev, tb = sys.exc_info() self.logger.error( '%s' % str(traceback.format_exception(et, ev, tb))) self.sendMail( 'FAILURE YouTube Upload: Delete_Youtube ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb))) if mp4_uri != None: """ We have an update. Insert the old video into the Delete_Hatrac table """ self.logger.debug( 'Inserting the old MP4 video "%s" file into the Delete_Hatrac table.' % (fileName)) url = '/entity/Common:Delete_Hatrac?defaults=RID,RCT,RMT' obj = { 'Hatrac_MD5': mp4_uri.split('/')[-1], 'Hatrac_URI': mp4_uri, 'Hatrac_Deleted': False, 'Record_Type': 'Immunofluorescence:Slide_Video', 'Record_RID': rid, 'Record_RCT': rct, 'Record_RMT': rmt, 'Record_Deleted': False } try: r = self.catalog.post(url, json=[obj]) r.raise_for_status() self.logger.debug( 'SUCCEEDED updated the Common:Delete_Hatrac table entry for the Hatrac_URI: "%s".' % (mp4_uri)) except: et, ev, tb = sys.exc_info() self.logger.error( '%s' % str(traceback.format_exception(et, ev, tb))) self.sendMail( 'FAILURE YouTube Upload: Delete_Hatrac ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb))) self.logger.debug('Uploading the video "%s" to YouTube' % (fileName)) """ Get the video properties """ cfg = self.getVideoProperties(f) if cfg != None: width, height = self.getVideoResolution(cfg) self.logger.debug('Video resolution: (%d x %d).' % (width, height)) else: self.logger.debug('Could not get the video resolution.') """ Initialize YouTube video parameters """ self.args.file = f self.args.title = ('%s:\n%s' % (consortium_url, title))[:64] self.args.description = description """ Upload video to YouTube """ try: request = self.youtube_request() if request is not None: id = self.youtube_upload(request) returncode = 0 else: returncode = 1 except: et, ev, tb = sys.exc_info() self.logger.error('got unexpected exception "%s"' % str(ev)) self.logger.error('%s' % str(traceback.format_exception(et, ev, tb))) self.sendMail( 'FAILURE YouTube Upload: YouTube ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb))) returncode = 1 if returncode != 0: self.logger.error('Can not upload to YouTube the "%s" file.' % (fileName)) self.sendMail( 'FAILURE YouTube Upload', 'Can not upload to YouTube the "%s" file.' % (fileName)) os.remove(f) """ Update the Slide_Video table with the failure result. """ self.reportFailure(accessionId, 'error_youtube_upload') continue """ Upload the Slide_Video table with the SUCCESS status """ columns = [ "MP4_URI", "YouTube_MD5", "YouTube_URI", "Processing_Status" ] #youtube_uri = "https://www.youtube.com/embed/%s?showinfo=0&rel=0" % id youtube_uri = "https://www.youtube.com/embed/%s?rel=0" % id os.remove(f) columns = ','.join([urlquote(col) for col in columns]) url = '/attributegroup/Immunofluorescence:Slide_Video/Accession_ID;%s' % ( columns) obj = { 'Accession_ID': accessionId, 'MP4_URI': MP4_URI, 'YouTube_URI': youtube_uri, 'YouTube_MD5': md5, 'Processing_Status': 'success' } try: r = self.catalog.put(url, json=[obj]) r.raise_for_status() self.logger.debug( 'SUCCEEDED updated the Immunofluorescence:Slide_Video table entry for the file: "%s".' % (fileName)) except: et, ev, tb = sys.exc_info() self.logger.error('%s' % str(traceback.format_exception(et, ev, tb))) self.sendMail( 'FAILURE YouTube Upload: Delete_Hatrac ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb))) self.logger.debug( 'SUCCEEDED updated the entry for the "%s" file.' % (fileName)) self.logger.debug('Ended uploading videos to YouTube.')
def register_release(self, id, dcc_datapackages, description=None): """Idempotently register new release in registry, returning (release row, dcc_datapackages). :param id: The release.id for the new record :param dcc_datapackages: A dict mapping {dcc_id: datapackage, ...} for constituents :param description: A human-readable description of this release The constituents are a set of datapackage records (dicts) as returned by the get_datapackage() method. The dcc_id key MUST match the submitting_dcc of the record. For repeat calls on existing releases, the definition will be updated if the release is still in the planning state, but a StateError will be raised if it is no longer in planning state. """ for dcc_id, dp in dcc_datapackages.items(): if dcc_id != dp['submitting_dcc']: raise ValueError( 'Mismatch in dcc_datapackages DCC IDs %s != %s' % (dcc_id, dp['submitting_dcc'])) try: rel, old_dcc_dps = self.get_release(id) except exception.ReleaseUnknown: # create new release record newrow = { 'id': id, 'status': terms.cfde_registry_rel_status.planning, 'description': None if description is nochange else description, } defaults = [ cname for cname in self._builder.CFDE.release.column_definitions.keys() if cname not in newrow ] logger.info('Registering new release %s' % (id, )) self._catalog.post('/entity/CFDE:release?defaults=%s' % (','.join(defaults), ), json=[newrow]) rel, old_dcc_dps = self.get_release(id) if rel['status'] != terms.cfde_registry_rel_status.planning: raise exception.StateError( 'Idempotent registration disallowed on existing release %(id)s with status=%(status)s' % rel) # prepare for idempotent updates old_dp_ids = {dp['id'] for dp in old_dcc_dps.values()} dp_ids = {dp['id'] for dp in dcc_datapackages.values()} datapackages = {dp['id']: dp for dp in dcc_datapackages.values()} # idempotently revise description if rel['description'] != description: logger.info('Updating release %s description: %s' % ( id, description, )) self.update_release(id, description=description) # find currently registered constituents path = self._builder.CFDE.dcc_release_datapackage.path path = path.filter(path.dcc_release_datapackage.release == id) old_dp_ids = {row['datapackage'] for row in path.entities().fetch()} # remove stale consituents for dp_id in old_dp_ids.difference(dp_ids): logger.info('Removing constituent datapackage %s from release %s' % (dp_id, id)) self._catalog.delete( '/entity/CFDE:dcc_release_datapackage/release=%s&datapackage=%s' % ( urlquote(id), urlquote(dp_id), )) # add new consituents new_dp_ids = dp_ids.difference(old_dp_ids) if new_dp_ids: logger.info('Adding constituent datapackages %s to release %s' % (new_dp_ids, id)) self._catalog.post('/entity/CFDE:dcc_release_datapackage', json=[{ 'dcc': datapackages[dp_id]['submitting_dcc'], 'release': id, 'datapackage': dp_id, } for dp_id in new_dp_ids]) # return registry content return self.get_release(id)
def uploadVideo(self): url = '/entity/Immunofluorescence:Slide_Video/!Identifier::null::&!Name::null::&!Bytes::null::&Media_Type=video%2Fmp4/Processing_Status=in%20progress;Processing_Status::null::' resp = self.catalog.get(url) resp.raise_for_status() videos = resp.json() videoids = [] for video in videos: videoids.append((video['Accession_ID'], video['Name'], video['Title'], video['Description'], video['Identifier'], video['MD5'], video['YouTube_MD5'], video['YouTube_URI'], video['RID'], video['Consortium'], video['MP4_URI'], video['RCT'], video['RMT'])) self.logger.debug('Processing %d video(s).' % (len(videoids))) for accessionId,fileName,title,description,uri,md5,youtube_md5,youtube_uri,rid,consortium,mp4_uri,rct,rmt in videoids: if description == None: description = '' consortium_url = '' if consortium == 'GUD': consortium_url = 'gudmap.org' elif consortium == 'RBK': consortium_url = 'rebuildingakidney.org' f, MP4_URI= self.getVideoFile(fileName, uri, consortium_url, md5, accessionId) if f == None or MP4_URI == None: self.reportFailure(accessionId, 'error_no_video_file') continue if youtube_uri != None and youtube_md5 != md5: """ We have an update. Mark the video to be deleted from YouTube """ url = '/entity/Common:Delete_Youtube?defaults=RID,RCT,RMT' obj = {'YouTube_MD5': youtube_md5, 'YouTube_URI': youtube_uri, 'Record_Type': 'Immunofluorescence:Slide_Video', 'Record_RID': rid, 'Youtube_Deleted': False } try: r = self.catalog.post( url, json=[obj] ) r.raise_for_status() self.logger.debug('SUCCEEDED updated the Common:Delete_Youtube table entry for the YouTube URL: "%s".' % (youtube_uri)) except: et, ev, tb = sys.exc_info() self.logger.error('%s' % str(traceback.format_exception(et, ev, tb))) self.sendMail('FAILURE YouTube Upload: Delete_Youtube ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb))) if mp4_uri != None: """ We have an update. Insert the old video into the Delete_Hatrac table """ self.logger.debug('Inserting the old MP4 video "%s" file into the Delete_Hatrac table.' % (fileName)) url = '/entity/Common:Delete_Hatrac?defaults=RID,RCT,RMT' obj = {'Hatrac_MD5': mp4_uri.split('/')[-1], 'Hatrac_URI': mp4_uri, 'Hatrac_Deleted': False, 'Record_Type': 'Immunofluorescence:Slide_Video', 'Record_RID': rid, 'Record_RCT': rct, 'Record_RMT': rmt, 'Record_Deleted': False } try: r = self.catalog.post( url, json=[obj] ) r.raise_for_status() self.logger.debug('SUCCEEDED updated the Common:Delete_Hatrac table entry for the Hatrac_URI: "%s".' % (mp4_uri)) except: et, ev, tb = sys.exc_info() self.logger.error('%s' % str(traceback.format_exception(et, ev, tb))) self.sendMail('FAILURE YouTube Upload: Delete_Hatrac ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb))) self.logger.debug('Uploading the video "%s" to YouTube' % (fileName)) """ Get the video properties """ cfg = self.getVideoProperties(f) if cfg != None: width,height = self.getVideoResolution(cfg) self.logger.debug('Video resolution: (%d x %d).' % (width, height)) else: self.logger.debug('Could not get the video resolution.') """ Initialize YouTube video parameters """ self.args.file = f self.args.title = ('%s:\n%s' % (consortium_url, title))[:64] self.args.description = description """ Upload video to YouTube """ try: request = self.youtube_request() if request is not None: id = self.youtube_upload(request) returncode = 0 else: returncode = 1 except: et, ev, tb = sys.exc_info() self.logger.error('got unexpected exception "%s"' % str(ev)) self.logger.error('%s' % str(traceback.format_exception(et, ev, tb))) self.sendMail('FAILURE YouTube Upload: YouTube ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb))) returncode = 1 if returncode != 0: self.logger.error('Can not upload to YouTube the "%s" file.' % (fileName)) self.sendMail('FAILURE YouTube Upload', 'Can not upload to YouTube the "%s" file.' % (fileName)) os.remove(f) """ Update the Slide_Video table with the failure result. """ self.reportFailure(accessionId, 'error_youtube_upload') continue """ Upload the Slide_Video table with the SUCCESS status """ columns = ["MP4_URI", "YouTube_MD5", "YouTube_URI", "Processing_Status"] #youtube_uri = "https://www.youtube.com/embed/%s?showinfo=0&rel=0" % id youtube_uri = "https://www.youtube.com/embed/%s?rel=0" % id os.remove(f) columns = ','.join([urlquote(col) for col in columns]) url = '/attributegroup/Immunofluorescence:Slide_Video/Accession_ID;%s' % (columns) obj = {'Accession_ID': accessionId, 'MP4_URI': MP4_URI, 'YouTube_URI': youtube_uri, 'YouTube_MD5': md5, 'Processing_Status': 'success' } try: r = self.catalog.put( url, json=[obj] ) r.raise_for_status() self.logger.debug('SUCCEEDED updated the Immunofluorescence:Slide_Video table entry for the file: "%s".' % (fileName)) except: et, ev, tb = sys.exc_info() self.logger.error('%s' % str(traceback.format_exception(et, ev, tb))) self.sendMail('FAILURE YouTube Upload: Delete_Hatrac ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb))) self.logger.debug('SUCCEEDED updated the entry for the "%s" file.' % (fileName)) self.logger.debug('Ended uploading videos to YouTube.')
def processHistologicalImages(self): """ Query for detecting new slides - the most recently first """ url = '/entity/Histological_Images:HE_Slide/!File_Bytes::null::&Pyramid_URL::null::/Processing_Status=in%%20progress;Processing_Status::null::@sort(%s::desc::)' % ( urlquote('RCT')) resp = self.catalog.get(url) resp.raise_for_status() slides = resp.json() slideids = [] for slide in slides: slideids.append( (slide['ID'], slide['Filename'], slide['File_URL'], slide['RCT'], slide['File_MD5'], slide['Name'], slide['RID'])) self.logger.debug('Processing %d HistologicalImages slides(s).' % (len(slideids))) for slideId, filename, file_url, creation_time, md5, name, rid in slideids: self.logger.debug('Generating pyramidal tiles for the file "%s"' % (filename)) """ Extract the file from hatrac """ f = self.getHatracFile(filename, file_url) if f == None: continue """ Create the directory for the tiles """ year = parse(creation_time).strftime("%Y") outdir = '%s/%s/%s' % (self.dzi, year, md5) if not os.path.exists(outdir): os.makedirs(outdir) """ Convert the file to DZI """ returncode = self.convert2dzi(f, outdir) if returncode != 0: """ Update the slide table with the failure result. """ self.updateAttributes( 'Histological_Images:HE_Slide', rid, ["Thumbnail", "Processing_Status"], { 'RID': rid, 'Thumbnail': '/thumbnails/generic/generic_genetic.png', 'Processing_Status': 'czi2dzi error' }) continue """ Generate the thumbnail """ thumbnail, urls = self.writeThumbnailImage(f, year, md5) if thumbnail == None: """ Update the slide table with the failure result. """ self.updateAttributes( 'Histological_Images:HE_Slide', rid, ["Thumbnail", "Processing_Status"], { 'RID': rid, 'Thumbnail': '/thumbnails/generic/generic_genetic.png', 'Processing_Status': 'DZI failure' }) continue """ Extract the metadata """ self.logger.debug('Extracting metadata for filename "%s"' % (filename)) bioformatsClient = BioformatsClient(showinf=self.showinf, \ czirules=self.czirules, \ cziFile=f, \ logger=self.logger) try: metadata = bioformatsClient.getMetadata() if metadata == None: metadata = {} self.logger.debug('Metadata: "%s"' % str(metadata)) os.remove('temp.xml') except XMLSyntaxError: et, ev, tb = sys.exc_info() self.logger.error('got unexpected exception "%s"' % str(ev)) self.logger.error('%s' % str(traceback.format_exception(et, ev, tb))) self.sendMail( 'FAILURE Tiles: XMLSyntaxError', '%s\n' % str(traceback.format_exception(et, ev, tb))) metadata = {} os.remove(f) """ Update the slide table with the success result. """ self.updateAttributes( 'Histological_Images:HE_Slide', rid, ["Thumbnail", "Pyramid_URL", "Processing_Status", "uri"], { 'RID': rid, 'Thumbnail': thumbnail, 'Pyramid_URL': '/%s?%s' % (self.viewer, urls), 'uri': '/%s?%s' % (self.viewer, urls), "Processing_Status": 'success' }) self.logger.debug( 'SUCCEEDED created the tiles directory for the file "%s".' % (filename)) """ Update/Create the image entry with the metadata """ obj = {} obj['ID'] = slideId obj['Name'] = name obj['url'] = '/chaise/viewer/#2/Histological_Images:HE_Slide/ID=%d' % slideId columns = ['ID', 'Name', 'url'] for col in self.metadata: if col in metadata and metadata[col] != None: columns.append(col) obj[col] = metadata[col] """ Check if we have an update or create """ rid = self.getRID('Histological_Images:HE_Image', 'ID=%d' % slideId) if rid != None: obj['RID'] = rid self.updateAttributes('Histological_Images:HE_Image', rid, columns, obj) else: self.createEntity('Histological_Images:HE_Image', obj) self.logger.debug( 'SUCCEEDED created the image entry for the file "%s".' % (filename)) self.logger.debug('Ended HistologicalImages Slides Processing.')
help='Modification Timestamp') (options, args) = parser.parse_args() if not options.server: print('ERROR: Missing host name') sys.exit() if not options.credentials: print('ERROR: Missing credentials file') sys.exit() if not options.RMT: RMT = '' else: RMT = '&RMT::geq::%s' % (urlquote(options.RMT)) """ Get the non NULL "Thumbnail" values from the "Scan" table. """ servername = options.server credentialsfilename = options.credentials catalog = 1 schema = 'Microscopy' table = 'Scan' column = 'Thumbnail' prefix = '/var/www/html' output = '%s_add_border.sh' % servername.split('.')[0] credentials = json.load(open(credentialsfilename)) catalog = ErmrestCatalog('https', servername, catalog, credentials)
table = 'Scan' acquisition = 'Acquisition Date' czi = 'HTTP URL' rid = 'RID' rct = 'RCT' filename = 'filename' credentials = json.load(open(credentialsfilename)) catalog = ErmrestCatalog('https', servername, catalog, credentials) hatrac_store = HatracStore( 'https', servername, {'cookie': credentials['cookie']} ) url = '/attribute/%s:%s/%s::null::/%s,%s,%s,%s' % (urlquote(schema), urlquote(table), urlquote(acquisition), urlquote(rid), urlquote(rct), urlquote(filename), urlquote(czi)) print 'Query URL: "%s"' % url resp = catalog.get(url) resp.raise_for_status() rows = resp.json() entities = [] for row in rows: if options.skip == True: acquisitionDate = row[rct][:10] else: acquisitionDate = getAcquisitionDate(row) entities.append({rid: row[rid], acquisition: acquisitionDate}) print 'Total rows to be updated: %d' % len(entities)
def processHistologicalImages(self): """ Query for detecting new slides - the most recently first """ url = '/entity/Histological_Images:HE_Slide/!File_Bytes::null::&Pyramid_URL::null::/Processing_Status=in%%20progress;Processing_Status::null::@sort(%s::desc::)' % (urlquote('RCT')) resp = self.catalog.get(url) resp.raise_for_status() slides = resp.json() slideids = [] for slide in slides: slideids.append((slide['ID'], slide['Filename'], slide['File_URL'], slide['RCT'], slide['File_MD5'], slide['Name'], slide['RID'])) self.logger.debug('Processing %d HistologicalImages slides(s).' % (len(slideids))) for slideId,filename,file_url,creation_time,md5,name,rid in slideids: self.logger.debug('Generating pyramidal tiles for the file "%s"' % (filename)) """ Extract the file from hatrac """ f = self.getHatracFile(filename, file_url) if f == None: continue """ Create the directory for the tiles """ year = parse(creation_time).strftime("%Y") outdir = '%s/%s/%s' % (self.dzi, year, md5) if not os.path.exists(outdir): os.makedirs(outdir) """ Convert the file to DZI """ returncode = self.convert2dzi(f, outdir) if returncode != 0: """ Update the slide table with the failure result. """ self.updateAttributes('Histological_Images:HE_Slide', rid, ["Thumbnail", "Processing_Status"], {'RID': rid, 'Thumbnail': '/thumbnails/generic/generic_genetic.png', 'Processing_Status': 'czi2dzi error' }) continue """ Generate the thumbnail """ thumbnail,urls = self.writeThumbnailImage(f, year, md5) if thumbnail == None: """ Update the slide table with the failure result. """ self.updateAttributes('Histological_Images:HE_Slide', rid, ["Thumbnail", "Processing_Status"], {'RID': rid, 'Thumbnail': '/thumbnails/generic/generic_genetic.png', 'Processing_Status': 'DZI failure' }) continue """ Extract the metadata """ self.logger.debug('Extracting metadata for filename "%s"' % (filename)) bioformatsClient = BioformatsClient(showinf=self.showinf, \ czirules=self.czirules, \ cziFile=f, \ logger=self.logger) try: metadata = bioformatsClient.getMetadata() if metadata == None: metadata = {} self.logger.debug('Metadata: "%s"' % str(metadata)) os.remove('temp.xml') except XMLSyntaxError: et, ev, tb = sys.exc_info() self.logger.error('got unexpected exception "%s"' % str(ev)) self.logger.error('%s' % str(traceback.format_exception(et, ev, tb))) self.sendMail('FAILURE Tiles: XMLSyntaxError', '%s\n' % str(traceback.format_exception(et, ev, tb))) metadata = {} os.remove(f) """ Update the slide table with the success result. """ self.updateAttributes('Histological_Images:HE_Slide', rid, ["Thumbnail","Pyramid_URL","Processing_Status","uri"], {'RID': rid, 'Thumbnail': thumbnail, 'Pyramid_URL': '/%s?%s' % (self.viewer, urls), 'uri': '/%s?%s' % (self.viewer, urls), "Processing_Status": 'success' }) self.logger.debug('SUCCEEDED created the tiles directory for the file "%s".' % (filename)) """ Update/Create the image entry with the metadata """ obj = {} obj['ID'] = slideId obj['Name'] = name obj['url'] = '/chaise/viewer/#2/Histological_Images:HE_Slide/ID=%d' % slideId columns = ['ID', 'Name', 'url'] for col in self.metadata: if col in metadata and metadata[col] != None: columns.append(col) obj[col] = metadata[col] """ Check if we have an update or create """ rid = self.getRID('Histological_Images:HE_Image', 'ID=%d' % slideId) if rid != None: obj['RID'] = rid self.updateAttributes('Histological_Images:HE_Image', rid, columns, obj ) else: self.createEntity('Histological_Images:HE_Image', obj) self.logger.debug('SUCCEEDED created the image entry for the file "%s".' % (filename)) self.logger.debug('Ended HistologicalImages Slides Processing.')
def __init__(self, *args, **kwargs): DerivaDownload.__init__(self, *args, **kwargs) self.config_file = kwargs.get("config_file") self.annotation_config = None if not self.config: self.config = copy.deepcopy(self.BASE_CONFIG) no_schema = kwargs.get("no_schema", False) if not no_schema: self.config["catalog"]["query_processors"].append(self.BASE_SCHEMA_QUERY_PROC) no_bag = kwargs.get("no_bag", False) if not no_bag: bag = dict() bag["bag_name"] = os.path.basename(self.output_dir) bag["bag_archiver"] = kwargs.get("bag_archiver", "tgz") bag["bag_algorithms"] = ["sha256", "md5"] self.config["bag"] = bag # if credentials have not been explicitly set yet, try to get them from the default credential store if not self.credentials: self.set_credentials(get_credential(self.hostname)) logging.debug("Inspecting catalog model...") model = self.catalog.getCatalogModel() # if we dont have catalog ownership rights, its a hard error for now if not model.acls: raise DerivaBackupAuthorizationError("Only catalog owners may perform full catalog dumps.") if kwargs.get("no_data", False): return exclude = kwargs.get("exclude_data", list()) for sname, schema in model.schemas.items(): if sname in exclude: logging.info("Excluding data dump from all tables in schema: %s" % sname) continue for tname, table in schema.tables.items(): fqtname = "%s:%s" % (sname, tname) if table.kind != "table": logging.warning("Skipping data dump of %s: %s" % (table.kind, fqtname)) continue if fqtname in exclude: logging.info("Excluding data dump from table: %s" % fqtname) continue if "RID" not in table.column_definitions.elements: logging.warning( "Source table %s.%s lacks system-columns and will not be dumped." % (sname, tname)) # Configure table data download query processors data_format = "json" if (sname, tname) in { ('public', 'ERMrest_Client'), ('public', 'ERMrest_Group'), } else "json-stream" q_sname = urlquote(sname) q_tname = urlquote(tname) output_path = self.BASE_DATA_OUTPUT_PATH.format(q_sname, q_tname) query_path = self.BASE_DATA_QUERY_PATH.format(q_sname, q_tname) query_proc = dict() query_proc["processor"] = data_format query_proc["processor_params"] = {"query_path": query_path, "output_path": output_path} self.config["catalog"]["query_processors"].append(query_proc) self.generate_asset_configs()
def metadata_query_url(study_id): """Build ERMrest query URL returning metadata record needed by class.""" return ('/attributegroup/' 'SPS:=%(sps)s/ID=%(sid)s;RID=%(sid)s/' 'IPS:=%(ips)s/' 'S1:=(SPS:%(s1)s)/' 'S2:=(SPS:%(s2)s)/' 'N1:=(IPS:%(n1)s)/' 'N2:=(IPS:%(n2)s)/' 'I1:=(N1:%(si)s)/' '$SPS/' '*;' 'I1:%(zs)s,' 'IPS:Alignment,' 'n1:=IPS:%(r1u)s,' 'n2:=IPS:%(r2u)s,' 's1:=SPS:%(r1u)s,' 's2:=SPS:%(r2u)s,' 's1raw:=S1:%(sfu)s,' 's2raw:=S2:%(sfu)s,' 's1box:=S1:%(slice)s,' 's2box:=S2:%(slice)s,' 's1n:=S1:%(nu)s,' 's2n:=S2:%(nu)s') % { 'sid': urlquote(study_id), 'sps': urlquote('Synaptic Pair Study'), 'ips': urlquote('Image Pair Study'), 'sfu': urlquote('Segments Filtered URL'), 's1': urlquote('Synaptic Region 1'), 's2': urlquote('Synaptic Region 2'), 'n1': urlquote('Nucleic Region 1'), 'n2': urlquote('Nucleic Region 2'), 'si': urlquote('Source Image'), 'zs': urlquote('ZYX Spacing'), 'r1u': urlquote('Region 1 URL'), 'r2u': urlquote('Region 2 URL'), 'slice': urlquote('ZYX Slice'), 'nu': urlquote('Npz URL'), }