def test_verbs(self): """oairepository - testing verbs""" self.assertNotEqual(None, re.search("Identify", oai_repository_server.oaiidentify("", None))) self.assertNotEqual(None, re.search("ListIdentifiers", oai_repository_server.oailistidentifiers(""))) self.assertNotEqual(None, re.search("ListRecords", oai_repository_server.oailistrecords(""))) self.assertNotEqual(None, re.search("ListMetadataFormats", oai_repository_server.oailistmetadataformats(""))) self.assertNotEqual(None, re.search("ListSets", oai_repository_server.oailistsets(""))) self.assertNotEqual(None, re.search("GetRecord", oai_repository_server.oaigetrecord("")))
def test_verbs(self): """oairepository - testing verbs""" self.assertNotEqual(None, re.search("Identify", oai_repository_server.oaiidentify("", None))) self.assertNotEqual(None, re.search("ListIdentifiers", oai_repository_server.oailistidentifiers(""))) self.assertNotEqual(None, re.search("ListRecords", oai_repository_server.oailistrecords(""))) self.assertNotEqual(None, re.search("ListMetadataFormats", oai_repository_server.oailistmetadataformats(""))) self.assertNotEqual(None, re.search("ListSets", oai_repository_server.oailistsets(""))) self.assertNotEqual(None, re.search("GetRecord", oai_repository_server.oaigetrecord("")))
def test_from_and_until(self): """oairepository - testing selective harvesting with 'from' and 'until' parameters""" # List available records, get datestamps and play with them identifiers = oai_repository_server.oailistidentifiers("") datestamps = re.findall('<identifier>(?P<id>.*)</identifier>\s*<datestamp>(?P<date>.*)</datestamp>', identifiers) sample_datestamp = datestamps[0][1] # Take one datestamp sample_oai_id = datestamps[0][0] # Take corresponding oai id sample_id = search_engine.perform_request_search(p=sample_oai_id, f=CFG_OAI_ID_FIELD)[0] # Find corresponding system number id # There must be some datestamps self.assertNotEqual([], datestamps) # We must be able to retrieve an id with the date we have just found self.assert_(sample_id in oai_repository_server.oaigetsysnolist(fromdate=sample_datestamp)) self.assert_(sample_id in oai_repository_server.oaigetsysnolist(untildate=sample_datestamp)) self.assert_(sample_id in oai_repository_server.oaigetsysnolist(untildate=sample_datestamp, \ fromdate=sample_datestamp)) # Same, with short format date. Eg 2007-12-13 self.assert_(sample_id in oai_repository_server.oaigetsysnolist(fromdate=sample_datestamp.split('T')[0])) self.assert_(sample_id in oai_repository_server.oaigetsysnolist(untildate=sample_datestamp.split('T')[0])) self.assert_(sample_id in oai_repository_server.oaigetsysnolist(fromdate=sample_datestamp.split('T')[0], \ untildate=sample_datestamp.split('T')[0])) # At later date (year after) we should not find our id again sample_datestamp_year = int(sample_datestamp[0:4]) sample_datestamp_rest = sample_datestamp[4:] later_datestamp = str(sample_datestamp_year + 1) + sample_datestamp_rest self.assert_(sample_id not in oai_repository_server.oaigetsysnolist(fromdate=later_datestamp)) # At earlier date (year before) we should not find our id again earlier_datestamp = str(sample_datestamp_year - 1) + sample_datestamp_rest self.assert_(sample_id not in oai_repository_server.oaigetsysnolist(untildate=earlier_datestamp)) # From earliest date to latest date must include all oai records dates = [(time.mktime(time.strptime(date[1], "%Y-%m-%dT%H:%M:%SZ")), date[1]) for date in datestamps] dates = dict(dates) sorted_times = dates.keys() sorted_times.sort() earliest_datestamp = dates[sorted_times[0]] latest_datestamp = dates[sorted_times[-1]] self.assertEqual(len(oai_repository_server.oaigetsysnolist()), \ len(oai_repository_server.oaigetsysnolist(fromdate=earliest_datestamp, \ untildate=latest_datestamp)))
def test_from_and_until(self): """oairepository - testing selective harvesting with 'from' and 'until' parameters""" # List available records, get datestamps and play with them identifiers = oai_repository_server.oailistidentifiers("") datestamps = re.findall('<identifier>(?P<id>.*)</identifier>\s*<datestamp>(?P<date>.*)</datestamp>', identifiers) sample_datestamp = datestamps[0][1] # Take one datestamp sample_oai_id = datestamps[0][0] # Take corresponding oai id sample_id = search_engine.perform_request_search(p=sample_oai_id, f=CFG_OAI_ID_FIELD)[0] # Find corresponding system number id # There must be some datestamps self.assertNotEqual([], datestamps) # We must be able to retrieve an id with the date we have just found self.assert_(sample_id in oai_repository_server.oaigetsysnolist(fromdate=sample_datestamp)) self.assert_(sample_id in oai_repository_server.oaigetsysnolist(untildate=sample_datestamp)) self.assert_(sample_id in oai_repository_server.oaigetsysnolist(untildate=sample_datestamp, \ fromdate=sample_datestamp)) # Same, with short format date. Eg 2007-12-13 self.assert_(sample_id in oai_repository_server.oaigetsysnolist(fromdate=sample_datestamp.split('T')[0])) self.assert_(sample_id in oai_repository_server.oaigetsysnolist(untildate=sample_datestamp.split('T')[0])) self.assert_(sample_id in oai_repository_server.oaigetsysnolist(fromdate=sample_datestamp.split('T')[0], \ untildate=sample_datestamp.split('T')[0])) # At later date (year after) we should not find our id again sample_datestamp_year = int(sample_datestamp[0:4]) sample_datestamp_rest = sample_datestamp[4:] later_datestamp = str(sample_datestamp_year + 1) + sample_datestamp_rest self.assert_(sample_id not in oai_repository_server.oaigetsysnolist(fromdate=later_datestamp)) # At earlier date (year before) we should not find our id again earlier_datestamp = str(sample_datestamp_year - 1) + sample_datestamp_rest self.assert_(sample_id not in oai_repository_server.oaigetsysnolist(untildate=earlier_datestamp)) # From earliest date to latest date must include all oai records dates = [(time.mktime(time.strptime(date[1], "%Y-%m-%dT%H:%M:%SZ")), date[1]) for date in datestamps] dates = dict(dates) sorted_times = dates.keys() sorted_times.sort() earliest_datestamp = dates[sorted_times[0]] latest_datestamp = dates[sorted_times[-1]] self.assertEqual(len(oai_repository_server.oaigetsysnolist()), \ len(oai_repository_server.oaigetsysnolist(fromdate=earliest_datestamp, \ untildate=latest_datestamp)))
def __call__(self, req, form): "OAI repository interface" # Clean input arguments. The protocol specifies that an error # has to be returned if the same argument is specified several # times. Eg: # oai2d?verb=ListIdentifiers&metadataPrefix=marcxml&metadataPrefix=marcxml # So keep the arguments as list for now so that check_argd can # return an error if needed (check_argd also transforms these # lists into strings) argd = wash_urlargd(form, {'verb': (list, []), 'metadataPrefix': (list, []), 'from': (list, []), 'until': (list, []), 'set': (list, []), 'identifier': (list, []), 'resumptionToken': (list, []), }) ## wash_urlargd(..) function cleaned everything, but also added ## unwanted parameters. Remove them now for param in argd.keys(): if not param in form and param != 'verb': del argd[param] ## wash_urlargd(..) function also removed unknown parameters ## that we would like to keep in order to send back an error ## as required by the protocol. But we do not need that value, ## so set it to empty string. for param in form.keys(): if param not in argd.keys(): argd[param] = '' ## But still remove 'ln' parameter that was automatically added. if argd.has_key('ln'): del argd['ln'] ## check request for OAI compliancy ## also transform all the list arguments into string oai_error = oai_repository_server.check_argd(argd) ## check availability (OAI requests for Identify, ListSets and ## ListMetadataFormats are served immediately, otherwise we ## shall wait for CFG_OAI_SLEEP seconds between requests): if os.path.exists("%s/RTdata/RTdata" % CFG_CACHEDIR) and (argd['verb'] not in ["Identify", "ListMetadataFormats", "ListSets"]): time_gap = int(time.time() - os.path.getmtime("%s/RTdata/RTdata" % CFG_CACHEDIR)) if(time_gap < CFG_OAI_SLEEP): req.headers_out["Status-Code"] = "503" req.headers_out["Retry-After"] = "%d" % (CFG_OAI_SLEEP - time_gap) req.status = apache.HTTP_SERVICE_UNAVAILABLE return "Retry after %d seconds" % (CFG_OAI_SLEEP - time_gap) command = "touch %s/RTdata/RTdata" % CFG_CACHEDIR os.system(command) ## construct args (argd string equivalent) for the ## oai_repository_server business logic (later it may be good if it ## takes argd directly): args = urllib.urlencode(argd) ## create OAI response req.content_type = "text/xml" req.send_http_header() if oai_error == "": ## OAI Identify if argd['verb'] == "Identify": req.write(oai_repository_server.oaiidentify(args, script_url=req.uri)) ## OAI ListSets elif argd['verb'] == "ListSets": req.write(oai_repository_server.oailistsets(args)) ## OAI ListIdentifiers elif argd['verb'] == "ListIdentifiers": req.write(oai_repository_server.oailistidentifiers(args)) ## OAI ListRecords elif argd['verb'] == "ListRecords": req.write(oai_repository_server.oailistrecords(args)) ## OAI GetRecord elif argd['verb'] == "GetRecord": req.write(oai_repository_server.oaigetrecord(args)) ## OAI ListMetadataFormats elif argd['verb'] == "ListMetadataFormats": req.write(oai_repository_server.oailistmetadataformats(args)) ## Unknown verb else: req.write(oai_repository_server.oai_error("badVerb","Illegal OAI verb")) ## OAI error else: req.write(oai_repository_server.oai_header(args,"")) req.write(oai_error) req.write(oai_repository_server.oai_footer("")) return "\n"
def __call__(self, req, form): "OAI repository interface" # Clean input arguments. The protocol specifies that an error # has to be returned if the same argument is specified several # times. Eg: # oai2d?verb=ListIdentifiers&metadataPrefix=marcxml&metadataPrefix=marcxml # So keep the arguments as list for now so that check_argd can # return an error if needed (check_argd also transforms these # lists into strings) argd = wash_urlargd( form, { 'verb': (list, []), 'metadataPrefix': (list, []), 'from': (list, []), 'until': (list, []), 'set': (list, []), 'identifier': (list, []), 'resumptionToken': (list, []), }) ## wash_urlargd(..) function cleaned everything, but also added ## unwanted parameters. Remove them now for param in argd.keys(): if not param in form and param != 'verb': del argd[param] ## wash_urlargd(..) function also removed unknown parameters ## that we would like to keep in order to send back an error ## as required by the protocol. But we do not need that value, ## so set it to empty string. for param in form.keys(): if param not in argd.keys(): argd[param] = '' ## But still remove 'ln' parameter that was automatically added. if argd.has_key('ln'): del argd['ln'] ## check request for OAI compliancy ## also transform all the list arguments into string oai_error = oai_repository_server.check_argd(argd) ## check availability (OAI requests for Identify, ListSets and ## ListMetadataFormats are served immediately, otherwise we ## shall wait for CFG_OAI_SLEEP seconds between requests): if os.path.exists( "%s/RTdata/RTdata" % CFG_CACHEDIR) and (argd['verb'] not in [ "Identify", "ListMetadataFormats", "ListSets" ]): time_gap = int(time.time() - os.path.getmtime("%s/RTdata/RTdata" % CFG_CACHEDIR)) if (time_gap < CFG_OAI_SLEEP): req.headers_out["Status-Code"] = "503" req.headers_out["Retry-After"] = "%d" % (CFG_OAI_SLEEP - time_gap) req.status = apache.HTTP_SERVICE_UNAVAILABLE return "Retry after %d seconds" % (CFG_OAI_SLEEP - time_gap) command = "touch %s/RTdata/RTdata" % CFG_CACHEDIR os.system(command) ## construct args (argd string equivalent) for the ## oai_repository_server business logic (later it may be good if it ## takes argd directly): args = urllib.urlencode(argd) ## create OAI response req.content_type = "text/xml" req.send_http_header() if oai_error == "": ## OAI Identify if argd['verb'] == "Identify": req.write( oai_repository_server.oaiidentify(args, script_url=req.uri)) ## OAI ListSets elif argd['verb'] == "ListSets": req.write(oai_repository_server.oailistsets(args)) ## OAI ListIdentifiers elif argd['verb'] == "ListIdentifiers": req.write(oai_repository_server.oailistidentifiers(args)) ## OAI ListRecords elif argd['verb'] == "ListRecords": req.write(oai_repository_server.oailistrecords(args)) ## OAI GetRecord elif argd['verb'] == "GetRecord": req.write(oai_repository_server.oaigetrecord(args)) ## OAI ListMetadataFormats elif argd['verb'] == "ListMetadataFormats": req.write(oai_repository_server.oailistmetadataformats(args)) ## Unknown verb else: req.write( oai_repository_server.oai_error("badVerb", "Illegal OAI verb")) ## OAI error else: req.write(oai_repository_server.oai_header(args, "")) req.write(oai_error) req.write(oai_repository_server.oai_footer("")) return "\n"