def writeRecord(req, node, metadataformat, mask=None): if not SET_LIST: initSetList(req) updatetime = node.get(DATEFIELD) if updatetime: d = ISO8601(date.parse_date(updatetime)) else: d = ISO8601(date.DateTime(EARLIEST_YEAR - 1, 12, 31, 23, 59, 59)) set_specs = getSetSpecsForNode(node) if DEBUG: timetable_update(req, " in writeRecord: getSetSpecsForNode: node: '%s, %s', metadataformat='%s' set_specs:%s" % (ustr(node.id), node.type, metadataformat, ustr(set_specs))) record_str = """ <record> <header><identifier>%s</identifier> <datestamp>%sZ</datestamp> %s </header> <metadata>""" % (mkIdentifier(node.id), d, set_specs) if DEBUG: timetable_update(req, " in writeRecord: writing header: node.id='%s', metadataformat='%s'" % (ustr(node.id), metadataformat)) if metadataformat == "mediatum": record_str += core.xmlnode.getSingleNodeXML(node) # in [masknode.name for masknode in getMetaType(node.getSchema()).getMasks() if masknode.get('masktype')=='exportmask']: #elif nodeHasOAIExportMask(node, metadataformat.lower()): # mask = getMetaType(node.getSchema()).getMask(u"oai_" + metadataformat.lower()) elif mask: if DEBUG: timetable_update( req, """ in writeRecord: mask = getMetaType(node.getSchema()).getMask(u"oai_"+metadataformat.lower()): node.id='%s', metadataformat='%s'""" % (ustr( node.id), metadataformat)) # XXX: fixXMLString is gone, do we need to sanitize XML here? record_str += mask.getViewHTML([node], flags=8).replace('lang=""', 'lang="unknown"') # for testing only, remove! if DEBUG: timetable_update( req, " in writeRecord: req.write(mask.getViewHTML([node], flags=8)): node.id='%s', metadataformat='%s'" % (ustr( node.id), metadataformat)) else: record_str += '<recordHasNoXMLRepresentation/>' record_str += '</metadata></record>' req.write(record_str) if DEBUG: timetable_update(req, "leaving writeRecord: node.id='%s', metadataformat='%s'" % (ustr(node.id), metadataformat))
def getNodes(req): global tokenpositions, CHUNKSIZE access = acl.AccessData(req) nodes = None if "resumptionToken" in req.params: token = req.params.get("resumptionToken") if token in tokenpositions: pos, nodes, metadataformat = tokenpositions[token] else: return None, "badResumptionToken", None if not checkParams(req, ["verb", "resumptionToken"]): OUT("OAI: getNodes: additional arguments (only verb and resumptionToken allowed)" ) return None, "badArgument", None else: token, metadataformat = new_token(req) if not checkMetaDataFormat(metadataformat): OUT('OAI: ListRecords: metadataPrefix missing', 'error') return None, "badArgument", None pos = 0 if not nodes: string_from, string_to = None, None try: string_from = req.params["from"] date_from = parseDate(string_from) if date_from.year < EARLIEST_YEAR: date_from = date.DateTime(0, 0, 0, 0, 0, 0) except: if "from" in req.params: return None, "badArgument", None date_from = None try: date_to = parseDate(req.params["until"]) string_to = req.params.get("until") if not date_to.has_time: date_to.hour = 23 date_to.minute = 59 date_to.second = 59 if date_to.year < EARLIEST_YEAR - 1: raise except: if "until" in req.params: return None, "badArgument", None date_to = None setspec = None if "set" in req.params: setspec = req.params.get("set") if string_from and string_to and (string_from > string_to or len(string_from) != len(string_to)): return None, "badArgument", None try: nodes = retrieveNodes(req, access, setspec, date_from, date_to, metadataformat) nodes = [n for n in nodes if not parentIsMedia(n)] # filter out nodes that are inactive or older versions of other nodes nodes = [n for n in nodes if n.isActiveVersion()] except tree.NoSuchNodeError: # collection doesn't exist return None, "badArgument", None with token_lock: tokenpositions[token] = pos + CHUNKSIZE, nodes, metadataformat tokenstring = '<resumptionToken expirationDate="' + ISO8601(date.now().add(3600 * 24)) + '" ' + \ 'completeListSize="' + str(len(nodes)) + '" cursor="' + str(pos) + '">' + token + '</resumptionToken>' if pos + CHUNKSIZE >= len(nodes): tokenstring = None with token_lock: del tokenpositions[token] OUT( req.params.get('verb') + ": set=" + str(req.params.get('set')) + ", " + str(len(nodes)) + " objects, format=" + metadataformat) res = tree.NodeList(nodes[pos:pos + CHUNKSIZE]) if DEBUG: timetable_update( req, "leaving getNodes: returning %d nodes, tokenstring='%s', metadataformat='%s'" % (len(res), tokenstring, metadataformat)) return res, tokenstring, metadataformat
def writeRecord(req, node, metadataformat): if not SET_LIST: initSetList(req) updatetime = node.get(DATEFIELD) if updatetime: d = ISO8601(date.parse_date(updatetime)) else: d = ISO8601(date.DateTime(EARLIEST_YEAR - 1, 12, 31, 23, 59, 59)) set_specs = getSetSpecsForNode(node) if DEBUG: timetable_update( req, " in writeRecord: getSetSpecsForNode: node: '%s, %s', metadataformat='%s' set_specs:%s" % (str(node.id), node.type, metadataformat, str(set_specs))) req.write(""" <record> <header><identifier>%s</identifier> <datestamp>%sZ</datestamp> %s </header> <metadata>""" % (mkIdentifier(node.id), d, set_specs)) if DEBUG: timetable_update( req, " in writeRecord: writing header: node.id='%s', metadataformat='%s'" % (str(node.id), metadataformat)) if metadataformat == "mediatum": req.write(core.xmlnode.getSingleNodeXML(node)) # in [masknode.name for masknode in getMetaType(node.getSchema()).getMasks() if masknode.get('masktype')=='exportmask']: elif nodeHasOAIExportMask(node, metadataformat.lower()): mask = getMetaType(node.getSchema()).getMask("oai_" + metadataformat.lower()) if DEBUG: timetable_update( req, """ in writeRecord: mask = getMetaType(node.getSchema()).getMask("oai_"+metadataformat.lower()): node.id='%s', metadataformat='%s'""" % (str(node.id), metadataformat)) try: req.write(fixXMLString(mask.getViewHTML( [node], flags=8))) # fix xml errors except: req.write(mask.getViewHTML([node], flags=8)) if DEBUG: timetable_update( req, " in writeRecord: req.write(mask.getViewHTML([node], flags=8)): node.id='%s', metadataformat='%s'" % (str(node.id), metadataformat)) else: req.write('<recordHasNoXMLRepresentation/>') req.write('</metadata></record>') if DEBUG: timetable_update( req, "leaving writeRecord: node.id='%s', metadataformat='%s'" % (str(node.id), metadataformat))
def getNodes(req): global tokenpositions, CHUNKSIZE nodes = None nids = None if "resumptionToken" in req.params: token = req.params.get("resumptionToken") if token in tokenpositions: pos, nids, metadataformat = tokenpositions[token] else: return None, "badResumptionToken", None if not checkParams(req, ["verb", "resumptionToken"]): logg.info("OAI: getNodes: additional arguments (only verb and resumptionToken allowed)") return None, "badArgument", None else: token, metadataformat = new_token(req) if not checkMetaDataFormat(metadataformat): logg.info('OAI: ListRecords: metadataPrefix missing') return None, "badArgument", None pos = 0 if not nids: string_from, string_to = None, None try: string_from = req.params["from"] date_from = parseDate(string_from) if date_from.year < EARLIEST_YEAR: date_from = date.DateTime(0, 0, 0, 0, 0, 0) except: if "from" in req.params: return None, "badArgument", None date_from = None try: date_to = parseDate(req.params["until"]) string_to = req.params.get("until") if not date_to.has_time: date_to.hour = 23 date_to.minute = 59 date_to.second = 59 if date_to.year < EARLIEST_YEAR - 1: raise except: if "until" in req.params: return None, "badArgument", None date_to = None setspec = None if "set" in req.params: setspec = req.params.get("set") if not oaisets.existsSetSpec(setspec): return None, "noRecordsMatch", None if string_from and string_to and (string_from > string_to or len(string_from) != len(string_to)): return None, "badArgument", None try: nodequery = retrieveNodes(req, setspec, date_from, date_to, metadataformat) nodequery = nodequery.filter(Node.subnode == False) #[n for n in nodes if not parentIsMedia(n)] # filter out nodes that are inactive or older versions of other nodes #nodes = [n for n in nodes if n.isActiveVersion()] # not needed anymore except: logg.exception('error retrieving nodes for oai') # collection doesn't exist return None, "badArgument", None #if not nodes: # return None, "badArgument", None with token_lock: if not nids: import time from sqlalchemy.orm import load_only atime = time.time() nodes = nodequery.options(load_only('id')).all() etime = time.time() logg.info('querying %d nodes for tokenposition took %.3f sec.' % (len(nodes), etime - atime)) atime = time.time() nids = [n.id for n in nodes] etime = time.time() logg.info('retrieving %d nids for tokenposition took %.3f sec.' % (len(nids), etime - atime)) tokenpositions[token] = pos + CHUNKSIZE, nids, metadataformat tokenstring = '<resumptionToken expirationDate="' + ISO8601(date.now().add(3600 * 24)) + '" ' + \ 'completeListSize="' + ustr(len(nids)) + '" cursor="' + ustr(pos) + '">' + token + '</resumptionToken>' if pos + CHUNKSIZE >= len(nids): tokenstring = None with token_lock: del tokenpositions[token] logg.info("%s : set=%s, objects=%s, format=%s", req.params.get('verb'), req.params.get('set'), len(nids), metadataformat) res = nids[pos:pos + CHUNKSIZE] if DEBUG: timetable_update(req, "leaving getNodes: returning %d nodes, tokenstring='%s', metadataformat='%s'" % (len(res), tokenstring, metadataformat)) return res, tokenstring, metadataformat