def list_set_records_save(setSpec): recs = [] set_rec_idents = [] try: set_rec_idents = list_identifiers(admin.get_repository_url(), setSpec) except oaiexceptions.NoRecordsMatch: pass for ident in set_rec_idents: ident['setSpec'] = setSpec ident['dc'] = get_record_metadata(admin.get_repository_url(), ident.get('identifier')) recs.append( ident ) return recs
def get(self): setlist = harv.list_sets(admin.get_repository_url()) set_records = [] all_records = [] datadir = os.path.join(current_app.instance_path, 'data', 'harvested', admin.get_repository_label()) print(datadir) try: for s in setlist: setSpec = s.get('setSpec') or 'unknown' filepath = glam_io.clean_filepath( os.path.join(datadir, ''.join((setSpec, '.json')))) if os.path.isfile(filepath): print('Testing existence of:', filepath) else: print('preparing new file:', filepath) set_records = harv.list_set_records(setSpec) glam_io.write_json(filepath, set_records) #all_records.extend(set_records) except harv.oaiexceptions.NoRecordsMatch: pass except Exception as e: harv.abort(400, e) #filename = '/'.join((datadir, 'all_sets.json')) #glam_io.write_json(filename, all_records) return setlist
def list_set_records(setSpec): set_recs = [] sickle = Sickle(admin.get_repository_url()) try: recs = sickle.ListRecords(metadataPrefix='oai_dc', set=setSpec) for rec in recs: #rec = recs.next() set_recs.append({ "identifier": rec.header.identifier, "datestamp": rec.header.datestamp, "setSpec": rec.header.setSpecs, "dc": rec.metadata, }) except Exception as e: pass #return [rec_type, rec.metadata, rec.header.identifier, rec.header.setSpecs, rec.header.datestamp, rec.header.deleted, rec.raw] return set_recs
def list_sets(repository_url=None): repository_url = repository_url or admin.get_repository_url() sickle = Sickle(repository_url) setlist = [] listsets = sickle.ListSets() try: for i in range(500): s = listsets.next() setlist.append( { 'setSpec': s.setSpec, 'setName': s.setName, } ) except StopIteration: pass except Exception as e: abort(400, e) return setlist
def list_identifiers(setSpec=None): keys = ['setSpec', 'setName'] identifiers_list = [] identifiers = get_identifiers_in_set(admin.get_repository_url(), setSpec) try: while(True): h = identifiers.next() identifiers_list.append( { 'identifier': h.identifier, 'datestamp': h.datestamp, } ) except StopIteration: pass except Exception as e: abort(400, e) return identifiers_list """
def get_identifiers_in_set(setSpec): sickle = Sickle(admin.get_repository_url()) return sickle.ListIdentifiers( **{ 'metadataPrefix': 'oai_dc', 'set': setSpec, })