def initRecord(self, resID): resURL = str(self.config.targetURI) + "/resource/" + str(resID) resourceJSON = Utils.getContent(resURL) self.resource = Utils.jsonToDict(resourceJSON) self.logger = gl.gleanomaticLogger(self.resource['sourceNamespace'], self.resource['setNamespace'], 'OAIMap') self.logger.info("Initializing record in OAIMap.") mapConfig = maps[self.resource["sourceNamespace"]][ self.resource["setNamespace"]] if 'prefix' in mapConfig: self.prefix = mapConfig['prefix'] self.mapper = self.getMapper(mapConfig["mapper"]) url = str(self.config.targetURI) + "/content/" + str(resID) content = Utils.getContent(url) try: data = Utils.getDictFromXML(content) except Exception as e: self.logger.critical("Could not get dict from xml. ERROR: " + str(e)) raise Exception(str(e)) try: record = data["OAI-PMH"]["GetRecord"]["record"] self.header = record["header"] self.metadata = record["metadata"] except KeyError as e: try: self.header = record["header"] except KeyError as e: self.logger.critical( "Could not find metadata or header in record.") raise BadOAIRecord(str(e)) try: status = self.header["status"] except KeyError as e: try: status = self.header["@status"] except KeyError as e: self.logger.critical("No status in header.") raise BadOAIRecord(str(e)) if status == 'deleted': self.deleted = True else: self.logger.critical("Unknown status type: " + str(status)) raise BadOAIRecord("No metadata. Unknown status: " + str(status)) if not self.deleted: mbr = MOHUBBaseRecord() self.resultRecord = mbr.getBaseRecord() return record
def pullDynamicOAIByURL(self, url): while url: self.logger.info("Pulling dynamic OAI from " + str(url)) try: data = Utils.getContent(url) except Exception as e: self.logger.warning("Could not get content from " + str(url) + " ERROR: " + str(e)) continue OAIerror = self.getError(data) if OAIerror: raise RSLoaderError( "Could not pull OAI records. OAIError: " + str(OAIerror), None, self.logger) rawIDs = data.split('<identifier>') #first item is the header del rawIDs[0] records = [] result = None for rawID in rawIDs: parts = rawID.split('</identifier>') resourceURL = str( self.OAISource) + "?verb=GetRecord&metadataPrefix=" + str( self.OAIMetaDataPrefix) + "&identifier=" + str( parts[0]) records.append(resourceURL) self.addBatch(records) rToken = self.getResumptionToken(data) if rToken: url = str( self.OAISource ) + "?verb=ListIdentifiers&resumptionToken=" + str(rToken) else: url = None
def getManifest(self, batchTag, sourceNamespace, setNamespace): url = self.endpointURI + "/static/" + str(sourceNamespace) + "/" + str( setNamespace) + "/" + str(batchTag) + "/manifest" urlCheck = Utils.checkURI(url) if not urlCheck: return False contents = Utils.getContent(url) return contents
def getResources(self, offset=0, count=20): url = self.endpointURI + str("resource") url = str(url) + "?offset=" + str(offset) + "&count=" + str(count) urlCheck = Utils.checkURI(url) if not urlCheck: return False f = urllib.request.urlopen(url) contents = Utils.getContent(url) return contents
def loadManifestIDs(self, sourceNamespace, setNamespace, batchTag): url = self.endpointURI + "/static/" + str(sourceNamespace) + "/" + str( setNamespace) + "/" + str(batchTag) + "/manifest" urlCheck = Utils.checkURI(url) if not urlCheck: return False ids = [] contents = Utils.getContent(url) lines = contents.split("\n") for line in lines: parts = line.split('><') resourceID = parts[-1] resourceID = resourceID.replace('/resource/', '') resourceID = resourceID.replace('>', '') ids.append(resourceID) return ids
def initRecord(self): resURL = str(self.config.targetURI) + "/resource/" + str(self.resID) resourceJSON = Utils.getContent(resURL) self.resource = Utils.jsonToDict(resourceJSON) self.logger = gl.gleanomaticLogger(self.resource['sourceNamespace'],self.resource['setNamespace'],'MimsyMap') self.logger.info("Initializing record in MimsyMap.") url = str(self.config.targetURI) + "/content/" + str(self.resID) try: response = Utils.getResponse(url) record = Utils.getJSONFromResponse(response) except Exception as e: raise gError("Could not get data from url",e,self.logger) if not self.deleted: mbr = MOHUBBaseRecord() self.resultRecord = mbr.getBaseRecord() return record
def pullDynamicOAI(self): url = str( self.OAISource) + "?verb=ListIdentifiers&metadataPrefix=" + str( self.OAIMetaDataPrefix) if self.OAIset: url = url + "&set=" + str(self.OAIset) while url: logger.info("Pulling dynamic OAI from " + str(url)) data = Utils.getContent(url) OAIerror = self.getError(data) if OAIerror: logger.critical( self.msg("Could not pull OAI records. Error: " + str(OAIerror))) raise ValueError("Could not pull OAI records. ERROR: " + str(OAIerror)) rawIDs = data.split('<identifier>') #first item is the header del rawIDs[0] records = [] result = None for rawID in rawIDs: parts = rawID.split('</identifier>') resourceURL = str( self.OAISource) + "?verb=GetRecord&metadataPrefix=" + str( self.OAIMetaDataPrefix) + "&identifier=" + str( parts[0]) records.append(resourceURL) self.addBatch(records) rToken = self.getResumptionToken(data) if rToken: url = str( self.OAISource ) + "?verb=ListIdentifiers&resumptionToken=" + str(rToken) else: url = None
localhost = "http://*****:*****@type' in res['rs:ln']: if str(res['rs:ln']['@type']).lower() == 'application/json': subResListURL = res['rs:ln']['@href'] subResListURL = subResListURL.replace(localhost, 'http://resourcesync/') #DEBUG print(subResListURL) subResContents = Utils.getContent(subResListURL) subResList = json.loads(subResContents) for url in subResList['urlset']['url']:
import gleanomatic.Utils as Utils from gleanomatic.configure import appConfig #log = { "LEVEL": "WARN", "MSG": "This is a new message" } #content = Utils.postToLog(log) content = Utils.getContent("http://resourcesync/resource") print(content)
import gleanomatic.Utils as Utils content = Utils.getContent("http://localhost:8080/admin") print(content)