Exemplo n.º 1
0
def writeRecord(req, node, metadataformat, mask=None):
    if not SET_LIST:
        initSetList(req)

    updatetime = node.get(DATEFIELD)
    if updatetime:
        d = ISO8601(date.parse_date(updatetime))
    else:
        d = ISO8601(date.DateTime(EARLIEST_YEAR - 1, 12, 31, 23, 59, 59))

    set_specs = getSetSpecsForNode(node)

    if DEBUG:
        timetable_update(req, " in writeRecord: getSetSpecsForNode: node: '%s, %s', metadataformat='%s' set_specs:%s" %
                         (ustr(node.id), node.type, metadataformat, ustr(set_specs)))

    record_str = """
           <record>
               <header><identifier>%s</identifier>
                       <datestamp>%sZ</datestamp>
                       %s
               </header>
               <metadata>""" % (mkIdentifier(node.id), d, set_specs)

    if DEBUG:
        timetable_update(req, " in writeRecord: writing header: node.id='%s', metadataformat='%s'" % (ustr(node.id), metadataformat))

    if metadataformat == "mediatum":
        record_str += core.xmlnode.getSingleNodeXML(node)
    # in [masknode.name for masknode in getMetaType(node.getSchema()).getMasks() if masknode.get('masktype')=='exportmask']:

    #elif nodeHasOAIExportMask(node, metadataformat.lower()):
    #    mask = getMetaType(node.getSchema()).getMask(u"oai_" + metadataformat.lower())
    elif mask:
        if DEBUG:
            timetable_update(
                req,
                """ in writeRecord: mask = getMetaType(node.getSchema()).getMask(u"oai_"+metadataformat.lower()): node.id='%s', metadataformat='%s'""" %
                (ustr(
                    node.id),
                    metadataformat))
        # XXX: fixXMLString is gone, do we need to sanitize XML here?
        record_str += mask.getViewHTML([node], flags=8).replace('lang=""', 'lang="unknown"')  # for testing only, remove!
        if DEBUG:
            timetable_update(
                req,
                " in writeRecord: req.write(mask.getViewHTML([node], flags=8)): node.id='%s', metadataformat='%s'" %
                (ustr(
                    node.id),
                    metadataformat))

    else:
        record_str += '<recordHasNoXMLRepresentation/>'

    record_str += '</metadata></record>'

    req.write(record_str)

    if DEBUG:
        timetable_update(req, "leaving writeRecord: node.id='%s', metadataformat='%s'" % (ustr(node.id), metadataformat))
Exemplo n.º 2
0
def getNodes(req):
    global tokenpositions, CHUNKSIZE
    access = acl.AccessData(req)
    nodes = None

    if "resumptionToken" in req.params:
        token = req.params.get("resumptionToken")
        if token in tokenpositions:
            pos, nodes, metadataformat = tokenpositions[token]
        else:
            return None, "badResumptionToken", None

        if not checkParams(req, ["verb", "resumptionToken"]):
            OUT("OAI: getNodes: additional arguments (only verb and resumptionToken allowed)"
                )
            return None, "badArgument", None
    else:
        token, metadataformat = new_token(req)
        if not checkMetaDataFormat(metadataformat):
            OUT('OAI: ListRecords: metadataPrefix missing', 'error')
            return None, "badArgument", None
        pos = 0

    if not nodes:
        string_from, string_to = None, None
        try:
            string_from = req.params["from"]
            date_from = parseDate(string_from)
            if date_from.year < EARLIEST_YEAR:
                date_from = date.DateTime(0, 0, 0, 0, 0, 0)
        except:
            if "from" in req.params:
                return None, "badArgument", None
            date_from = None

        try:
            date_to = parseDate(req.params["until"])
            string_to = req.params.get("until")
            if not date_to.has_time:
                date_to.hour = 23
                date_to.minute = 59
                date_to.second = 59
            if date_to.year < EARLIEST_YEAR - 1:
                raise
        except:
            if "until" in req.params:
                return None, "badArgument", None
            date_to = None

        setspec = None
        if "set" in req.params:
            setspec = req.params.get("set")

        if string_from and string_to and (string_from > string_to or
                                          len(string_from) != len(string_to)):
            return None, "badArgument", None

        try:
            nodes = retrieveNodes(req, access, setspec, date_from, date_to,
                                  metadataformat)
            nodes = [n for n in nodes if not parentIsMedia(n)]
            # filter out nodes that are inactive or older versions of other nodes
            nodes = [n for n in nodes if n.isActiveVersion()]
        except tree.NoSuchNodeError:
            # collection doesn't exist
            return None, "badArgument", None

    with token_lock:
        tokenpositions[token] = pos + CHUNKSIZE, nodes, metadataformat
    tokenstring = '<resumptionToken expirationDate="' + ISO8601(date.now().add(3600 * 24)) + '" ' + \
        'completeListSize="' + str(len(nodes)) + '" cursor="' + str(pos) + '">' + token + '</resumptionToken>'
    if pos + CHUNKSIZE >= len(nodes):
        tokenstring = None
        with token_lock:
            del tokenpositions[token]
    OUT(
        req.params.get('verb') + ": set=" + str(req.params.get('set')) + ", " +
        str(len(nodes)) + " objects, format=" + metadataformat)
    res = tree.NodeList(nodes[pos:pos + CHUNKSIZE])
    if DEBUG:
        timetable_update(
            req,
            "leaving getNodes: returning %d nodes, tokenstring='%s', metadataformat='%s'"
            % (len(res), tokenstring, metadataformat))

    return res, tokenstring, metadataformat
Exemplo n.º 3
0
def writeRecord(req, node, metadataformat):
    if not SET_LIST:
        initSetList(req)

    updatetime = node.get(DATEFIELD)
    if updatetime:
        d = ISO8601(date.parse_date(updatetime))
    else:
        d = ISO8601(date.DateTime(EARLIEST_YEAR - 1, 12, 31, 23, 59, 59))

    set_specs = getSetSpecsForNode(node)

    if DEBUG:
        timetable_update(
            req,
            " in writeRecord: getSetSpecsForNode: node: '%s, %s', metadataformat='%s' set_specs:%s"
            % (str(node.id), node.type, metadataformat, str(set_specs)))

    req.write("""
           <record>
               <header><identifier>%s</identifier>
                       <datestamp>%sZ</datestamp>
                       %s
               </header>
               <metadata>""" % (mkIdentifier(node.id), d, set_specs))

    if DEBUG:
        timetable_update(
            req,
            " in writeRecord: writing header: node.id='%s', metadataformat='%s'"
            % (str(node.id), metadataformat))

    if metadataformat == "mediatum":
        req.write(core.xmlnode.getSingleNodeXML(node))
    # in [masknode.name for masknode in getMetaType(node.getSchema()).getMasks() if masknode.get('masktype')=='exportmask']:
    elif nodeHasOAIExportMask(node, metadataformat.lower()):
        mask = getMetaType(node.getSchema()).getMask("oai_" +
                                                     metadataformat.lower())
        if DEBUG:
            timetable_update(
                req,
                """ in writeRecord: mask = getMetaType(node.getSchema()).getMask("oai_"+metadataformat.lower()): node.id='%s', metadataformat='%s'"""
                % (str(node.id), metadataformat))
        try:
            req.write(fixXMLString(mask.getViewHTML(
                [node], flags=8)))  # fix xml errors
        except:
            req.write(mask.getViewHTML([node], flags=8))
        if DEBUG:
            timetable_update(
                req,
                " in writeRecord: req.write(mask.getViewHTML([node], flags=8)): node.id='%s', metadataformat='%s'"
                % (str(node.id), metadataformat))

    else:
        req.write('<recordHasNoXMLRepresentation/>')

    req.write('</metadata></record>')

    if DEBUG:
        timetable_update(
            req, "leaving writeRecord: node.id='%s', metadataformat='%s'" %
            (str(node.id), metadataformat))
Exemplo n.º 4
0
def getNodes(req):
    global tokenpositions, CHUNKSIZE
    nodes = None
    nids = None

    if "resumptionToken" in req.params:
        token = req.params.get("resumptionToken")
        if token in tokenpositions:
            pos, nids, metadataformat = tokenpositions[token]
        else:
            return None, "badResumptionToken", None

        if not checkParams(req, ["verb", "resumptionToken"]):
            logg.info("OAI: getNodes: additional arguments (only verb and resumptionToken allowed)")
            return None, "badArgument", None
    else:
        token, metadataformat = new_token(req)
        if not checkMetaDataFormat(metadataformat):
            logg.info('OAI: ListRecords: metadataPrefix missing')
            return None, "badArgument", None
        pos = 0

    if not nids:
        string_from, string_to = None, None
        try:
            string_from = req.params["from"]
            date_from = parseDate(string_from)
            if date_from.year < EARLIEST_YEAR:
                date_from = date.DateTime(0, 0, 0, 0, 0, 0)
        except:
            if "from" in req.params:
                return None, "badArgument", None
            date_from = None

        try:
            date_to = parseDate(req.params["until"])
            string_to = req.params.get("until")
            if not date_to.has_time:
                date_to.hour = 23
                date_to.minute = 59
                date_to.second = 59
            if date_to.year < EARLIEST_YEAR - 1:
                raise
        except:
            if "until" in req.params:
                return None, "badArgument", None
            date_to = None

        setspec = None
        if "set" in req.params:
            setspec = req.params.get("set")
            if not oaisets.existsSetSpec(setspec):
                return None, "noRecordsMatch", None


        if string_from and string_to and (string_from > string_to or len(string_from) != len(string_to)):
            return None, "badArgument", None

        try:
            nodequery = retrieveNodes(req, setspec, date_from, date_to, metadataformat)
            nodequery = nodequery.filter(Node.subnode == False)  #[n for n in nodes if not parentIsMedia(n)]

            # filter out nodes that are inactive or older versions of other nodes
            #nodes = [n for n in nodes if n.isActiveVersion()]  # not needed anymore
        except:
            logg.exception('error retrieving nodes for oai')
            # collection doesn't exist
            return None, "badArgument", None
        #if not nodes:
        #    return None, "badArgument", None

    with token_lock:
        if not nids:
            import time
            from sqlalchemy.orm import load_only
            atime = time.time()
            nodes = nodequery.options(load_only('id')).all()
            etime = time.time()
            logg.info('querying %d nodes for tokenposition took %.3f sec.' % (len(nodes), etime - atime))
            atime = time.time()
            nids = [n.id for n in nodes]
            etime = time.time()
            logg.info('retrieving %d nids for tokenposition took %.3f sec.' % (len(nids), etime - atime))

        tokenpositions[token] = pos + CHUNKSIZE, nids, metadataformat


    tokenstring = '<resumptionToken expirationDate="' + ISO8601(date.now().add(3600 * 24)) + '" ' + \
        'completeListSize="' + ustr(len(nids)) + '" cursor="' + ustr(pos) + '">' + token + '</resumptionToken>'
    if pos + CHUNKSIZE >= len(nids):
        tokenstring = None
        with token_lock:
            del tokenpositions[token]
    logg.info("%s : set=%s, objects=%s, format=%s", req.params.get('verb'), req.params.get('set'), len(nids), metadataformat)
    res = nids[pos:pos + CHUNKSIZE]
    if DEBUG:
        timetable_update(req, "leaving getNodes: returning %d nodes, tokenstring='%s', metadataformat='%s'" %
                         (len(res), tokenstring, metadataformat))

    return res, tokenstring, metadataformat