def fetch_opensky_pids (self): """ implement a solrQuery Feed to generate pids (doi:* AND date:[2014-01-01 TO *]) - should fetch 5184 (VERIFIED) """ query = 'mods_identifier_doi_mt:*' query += ' AND keyDate:[2014-01-01T00:00:00Z TO *]' # query += ' AND keyDate:[{} TO *]'.format(get_utc_time('2014-01-01')) # print 'QUERY: {}'.format(query) args = { 'params' : { # 'q': affiliation_clause + ' AND ' + date_clause, # 'q': 'mods_identifier_doi_mt:*' 'q': query }, 'baseUrl': CONFIG.get("fedora", "SERVER") + CONFIG.get("fedora", "SOLR_PATH"), } feed = feeds.SolrSearchFeed(**args) print '{} in feed'.format(len(feed.pids)) return feed.pids
def getFeed(): args = { 'params' : { 'q': 'mods_note_funding_s:*' }, 'baseUrl': CONFIG.get("fedora", "SERVER") + CONFIG.get("fedora", "SOLR_PATH"), } feed = feeds.SolrSearchFeed(**args) feed.numFound = len(feed.pids) print 'feed: %d (%d)' % (len(feed.pids), feed.size()) feed.pids.sort() # for i, pid in enumerate(feed.pids): # print '- %d - %s' % (i, pid) return feed
def update_old_funder_records(): """ The Kuali backfill updated the records that also had Kuali IDs, but there are 452 that still have old-school funder info (mods_name_corporate_funder_s:*). <name type="corporate"> <namePart>National Science Foundation (NSF)</namePart> <role> <roleTerm type="text" authority="marcrelator">funder</roleTerm> </role> </name> <note type="funding">National Science Foundation (NSF): 1852977</note> Now we want to remove the corporate funder info, and try to normalize the award_ids with Kuali-verfied versions. If ids can't be normlized thus, then add a 'displayLabel' = 'Legacy funding data' attribute """ dowrites = 0 args = { 'params': { 'q': 'mods_name_corporate_funder_s:*' }, 'baseUrl': CONFIG.get("fedora", "SERVER") + CONFIG.get("fedora", "SOLR_PATH"), } feed = feeds.SolrSearchFeed(**args) print 'feed has {}'.format(len(feed.pids)) NotesMODS.dowrites = dowrites for pid in feed.pids: print pid mods_xml = get_datastream(pid, 'MODS') mods = NotesMODS(mods_xml, pid) # print 'BEFORE Backfill' # mods.show_notes() mods.do_back_fill([])