def headerFromLxmlElement(et): identifier = et.xpath('string(//oai:identifier)', namespaces={'oai': NS_OAIPMH}) datestamp = et.xpath('string(//oai:datestamp)', namespaces={'oai': NS_OAIPMH}) datestamp = datetime.datetime.strptime(datestamp, '%Y-%m-%dT%H:%M:%SZ') return Header(identifier, datestamp, [], None)
def _get_header(self, obj): if (isinstance(obj, User)): timeFunc = lambda u: u.last_login else: timeFunc = lambda e: e.update_time # Get UTC timestamp timestamp = get_utc_time(timeFunc(obj)).replace(tzinfo=None) return Header(self.get_id(obj), timestamp, [], None)
def _get_header(self, obj): if isinstance(obj, User): time_func = lambda u: u.last_login else: time_func = lambda e: e.update_time # Get UTC timestamp timestamp = time_func(obj) if timestamp is None: timestamp = datetime.datetime.now() else: timestamp = get_utc_time(timestamp).replace(tzinfo=None) return Header(self.get_id(obj), timestamp, [], None)
def listRecords(self, metadataPrefix, set=None, from_=None, until=None, cursor=0, batch_size=10): """Return a list of (header, metadata, about) tuples for records which match the given parameters. metadataPrefix - identifies metadata set to retrieve set - set identifier; only return records in set (optional) from_ - only retrieve records from from_ date forward (optional) until - only retrieve records with dates up to and including until date (optional) Should raise error.CannotDisseminateFormatError if metadataPrefix is not supported by the repository. Should raise error.NoSetHierarchyError if the repository does not support sets. """ if metadataPrefix and not (metadataPrefix in self.protocolMap.recordNamespaces): raise CannotDisseminateFormatError() # Cheshire3 does not support sets if set: raise NoSetHierarchyError() if not self.metadataRegistry.hasWriter(metadataPrefix): # need to create a 'MetadataWriter' for this schema for oaipmh to use, and put in self.metadataRegister schemaId = self.protocolMap.recordNamespaces[metadataPrefix] txr = self.protocolMap.transformerHash.get(schemaId, None) mdw = Cheshire3OaiMetadataWriter(txr) self.metadataRegistry.registerWriter(metadataPrefix, mdw) # get list of datestamp, resultSet tuples tuples = self._listResults(metadataPrefix, set, from_, until) # need to return iterable of (header, metadata, about) tuples # Header(identifier, datestamp, setspec, deleted) - identifier: string, datestamp: dtaetime.datetime instance, setspec: list, deleted: boolean? records = [] i = 0 for (datestamp, rs) in tuples: for r in rs: if i < cursor: i += 1 continue rec = r.fetch_record(session) records.append((Header(str(r.id), datestamp, [], None), rec, None)) i += 1 if (len(records) == batch_size): return records return records
def getRecord(self, metadataPrefix, identifier): """Return a (header, metadata, about) tuple for the the record. metadataPrefix - identifies metadata set to retrieve the record in identifier - repository-unique identifier of record Should raise error.CannotDisseminateFormatError if metadataPrefix is unknown or not supported by identifier. Should raise error.IdDoesNotExistError if identifier is unknown or illegal. """ if metadataPrefix and not (metadataPrefix in self.protocolMap.recordNamespaces): raise CannotDisseminateFormatError() if not self.metadataRegistry.hasWriter(metadataPrefix): # need to create a 'MetadataWriter' for this schema for oaipmh to use, and put in self.metadataRegister schemaId = self.protocolMap.recordNamespaces[metadataPrefix] txr = self.protocolMap.transformerHash.get(schemaId, None) mdw = Cheshire3OaiMetadataWriter(txr) self.metadataRegistry.registerWriter(metadataPrefix, mdw) q = cqlparse('rec.identifier exact "%s"' % (identifier)) try: rs = self.db.search(session, q) except SRWDiagnostics.Diagnostic16: raise ConfigFileException( 'Index map for rec.identifier required in protocolMap: %s' % self.db.get_path(session, 'protocolMap').id) if not len(rs) or len(rs) > 1: raise IdDoesNotExistError('%s records exist for this identifier' % (len(rs))) r = rs[0] rec = r.fetch_record(session) # now reverse lookup lastModificationDate q = cqlparse('rec.lastModificationDate < "%s"' % (datetime.datetime.utcnow())) pm = self.db.get_path(session, 'protocolMap') # get CQL ProtocolMap idx = pm.resolveIndex(session, q) vector = idx.fetch_vector(session, rec) term = idx.fetch_termById(session, vector[2][0][0]) try: datestamp = datetime.datetime.strptime(term, '%Y-%m-%dT%H:%M:%S') except ValueError: datestamp = datetime.datetime.strptime(term, '%Y-%m-%d %H:%M:%S') return (Header(str(r.id), datestamp, [], None), rec, None)
def listIdentifiers(self, metadataPrefix, set=None, from_=None, until=None, cursor=0, batch_size=10): """Return a list of Header objects for records which match the given parameters. metadataPrefix - identifies metadata set to retrieve set - set identifier; only return headers in set (optional) from_ - only retrieve headers from from_ date forward (optional) until - only retrieve headers with dates up to and including until date (optional) Should raise error.CannotDisseminateFormatError if metadataPrefix is not supported by the repository. Should raise error.NoSetHierarchyError if the repository does not support sets. """ if metadataPrefix and not (metadataPrefix in self.protocolMap.recordNamespaces): raise CannotDisseminateFormatError() # Cheshire3 does not support sets if set: raise NoSetHierarchyError() # get list of datestamp, resultSet tuples tuples = self._listResults(metadataPrefix, set, from_, until) # need to return iterable of header objects # Header(identifier, datestamp, setspec, deleted) - identifier: string, datestamp: dtaetime.datetime instance, setspec: list, deleted: boolean? headers = [] i = 0 for (datestamp, rs) in tuples: for r in rs: if i < cursor: i += 1 continue headers.append(Header(str(r.id), datestamp, [], None)) i += 1 if (len(headers) >= batch_size): return headers return headers
def listRecords(self, metadataPrefix, set=None, from_=None, until=None, cursor=0, batch_size=10): """Return a list of records. Return a list of (header, metadata, about) tuples for records which match the given parameters. metadataPrefix identifies metadata set to retrieve set set identifier; only return records in set (optional) from_ only retrieve records from from_ date forward (optional) until only retrieve records with dates up to and including until date (optional) Should raise error.CannotDisseminateFormatError if metadataPrefix is not supported by the repository. Should raise error.NoSetHierarchyError if the repository does not support sets. """ session = self.session if (metadataPrefix and not (metadataPrefix in self.protocolMap.recordNamespaces)): raise CannotDisseminateFormatError() # Cheshire3 does not support sets if set: raise NoSetHierarchyError() if not self.metadataRegistry.hasWriter(metadataPrefix): # Need to create a 'MetadataWriter' for this schema for oaipmh to # use, and put in self.metadataRegister schemaId = self.protocolMap.recordNamespaces[metadataPrefix] txr = self.protocolMap.transformerHash.get(schemaId, None) mdw = Cheshire3OaiMetadataWriter(txr) self.metadataRegistry.registerWriter(metadataPrefix, mdw) # Get list of datestamp, resultSet tuples tuples = self._listResults(metadataPrefix, set, from_, until) # Need to return iterable of (header, metadata, about) tuples # Header(identifier, datestamp, setspec, deleted) # identifier: string, datestamp: datetime.datetime instance # setspec: list # deleted: boolean? records = [] i = 0 for (datestamp, rs) in tuples: for r in rs: if i < cursor: i += 1 continue # Handle non-ascii characters in identifier identifier = unicode(r.id, 'utf-8') identifier = identifier.encode('ascii', 'xmlcharrefreplace') try: rec = r.fetch_record(session) except ObjectDeletedException as e: records.append((Header(identifier, datestamp, [], True), None, None)) else: records.append((Header(identifier, datestamp, [], None), rec, None)) i += 1 if (len(records) == batch_size): return records return records
def listIdentifiers(self, metadataPrefix, set=None, from_=None, until=None, cursor=0, batch_size=10): """Return a list of Header objects for matching records. Return a list of Header objects for records which match the given parameters. metadataPrefix identifies metadata set to retrieve set set identifier; only return headers in set (optional) from_ only retrieve headers from from_ date forward (optional) until only retrieve headers with dates up to and including until date (optional) Should raise error.CannotDisseminateFormatError if metadataPrefix is not supported by the repository. Should raise error.NoSetHierarchyError if the repository does not support sets. """ if (metadataPrefix and not (metadataPrefix in self.protocolMap.recordNamespaces)): raise CannotDisseminateFormatError() # Cheshire3 does not support sets by default if set: raise NoSetHierarchyError() # Get list of datestamp, resultSet tuples tuples = self._listResults(metadataPrefix, set, from_, until) # Need to return iterable of header objects # Header(identifier, datestamp, setspec, deleted) # identifier: string, datestamp: # datetime.datetime instance # setspec: list # deleted: boolean? headers = [] i = 0 for (datestamp, rs) in tuples: for r in rs: if i < cursor: i += 1 continue # Handle non-ascii characters in identifier identifier = unicode(r.id, 'utf-8') identifier = identifier.encode('ascii', 'xmlcharrefreplace') try: r.fetch_record(session) except ObjectDeletedException as e: headers.append(Header(identifier, datestamp, [], True)) else: headers.append(Header(identifier, datestamp, [], None)) i += 1 if (len(headers) >= batch_size): return headers return headers