def list_records(self, set="", resumption_token = ""): ''' List records. Use either the resumption token or set id. ''' if resumption_token: params = {'verb' : 'ListRecords', 'resumptionToken': resumption_token} else: params = {'verb' : 'ListRecords', 'metadataPrefix': 'oai_dc', 'set': set} qstr = urllib.urlencode(params) url = self.root + '?' + qstr self.logger.debug('OAI request URL: {0}'.format(url)) start_t = time.time() resp, content = self.h.request(url) retrieved_t = time.time() self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t)) doc = bindery.parse(url, model=OAI_LISTRECORDS_MODEL) records, first_id = metadata_dict(generate_metadata(doc), nesteddict=False) for id_, props in records: for k, v in props.iteritems(): props[k] = [ U(item) for item in v ] if (doc.OAI_PMH.ListRecords is not None) and (doc.OAI_PMH.ListRecords.resumptionToken is not None): resumption_token = U(doc.OAI_PMH.ListRecords.resumptionToken) else: resumption_token = '' return {'records' : records, 'resumption_token' : resumption_token}
def factory(rest_uri, moin_link=None, opener=None): opener = opener or urllib2.build_opener() logger.debug('rest_uri: ' + rest_uri) req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT}) resp = opener.open(req) doc = bindery.parse(resp, standalone=True, model=MOIN_DOCBOOK_MODEL) original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER] #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER] #amara.xml_print(self.content_cache) metadata, first_id = metadata_dict(generate_metadata(doc)) metadata = metadata[first_id] akara_type = U(metadata[u'ak-type']) logger.debug('Type: ' + akara_type) try: #Older Moin CMS resource types are implemented by registration to the global node.NODES cls = node.NODES[akara_type] except KeyError: #Newer Moin CMS resource types are implemented by discovery of a URL, #to which a POST request executes the desired action return node.ENDPOINTS and (rest_uri, akara_type, node.ENDPOINTS[akara_type], doc, metadata, original_wiki_base) else: instance = cls(rest_uri, moin_link, opener, cache=(doc, metadata, original_wiki_base)) return instance
def pubmed_adapter(search=None, id=None): ''' Sample queries: #curl "http://localhost:8880/pubmed?" curl "http://localhost:8880/pubmed?search=stem+cells" curl "http://localhost:8880/pubmed?id=19358275" ''' #FIXME: How do we handle no search or id param? Just serve up the latest entries? Or error as below? #assert_(not(search and id), msg="You must specify the 'search' or 'id' query parameter is mandatory.") if search: #search = first_item(search) #reldate: only search for last N days #query = urllib.urlencode({'db' : NCBI_DB, 'term': query, 'reldate': '60', 'datetype': 'edat', 'retmax': DEFAULT_MAX_RESULTS, 'usehistory': 'y'}) query = urllib.urlencode({'term': search, 'db' : NCBI_DB, 'datetype': 'edat', 'retmax': DEFAULT_MAX_RESULTS, 'usehistory': 'y'}) search_url = NCBI_SEARCH_PATTERN + query logger.debug("Term search URL: " + search_url) doc = bindery.parse(search_url, standalone=True) search_terms = search ids = ( unicode(i) for i in doc.eSearchResult.IdList.Id ) ids = ','.join(ids) self_link = '/pubmed?search='+search else: #ids = first_item(id) #fulltext = fulltext[0] if fulltext else u'no' #if fulltext == 'yes': search_terms = ids self_link = '/pubmed?id='+ids query = urllib.urlencode({'db' : NCBI_DB, 'id': ids, 'retmode': 'xml'}) search_url = NCBI_ARTICLE_ACCESS_PATTERN + query logger.debug("ID search URL: " + search_url) alt_link = search_url doc = bindery.parse(search_url, standalone=True, model=PUBMED_MODEL) #doc = bindery.parse(open('/Users/uche/tmp/efetch.fcgi.html'), standalone=True, model=PUBMED_MODEL) metadata, first_id = metadata_dict(generate_metadata(doc)) return atom_results(doc, metadata, self_link, alt_link, search_terms)
def search(self, term): qstr = urllib.urlencode({'verb' : 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': dspace_id}) url = DSPACE_OAI_ENDPOINT + '?' + qstr logger.debug('DSpace URL: ' + str(url)) #keywords = [ (k.strip(), JOVE_TAG) for k in unicode(row.xml_select(u'string(.//*[@class="keywords"])')).split(',') ] doc = bindery.parse(url, model=OAI_MODEL) #print >> sys.stderr, list(generate_metadata(doc)) resources, first_id = metadata_dict(generate_metadata(doc), nesteddict=False) record = doc.OAI_PMH resource = resources[first_id]
def get_record(self, id): params = {'verb': 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': id} qstr = urllib.urlencode(params) url = self.root + '?' + qstr self.logger.debug('OAI request URL: {0}'.format(url)) start_t = time.time() resp, content = self.h.request(url) retrieved_t = time.time() self.logger.debug('Retrieved in {0}s',format(retrieved_t - start_t)) doc = bindery.parse(url, model=OAI_GETRECORD_MODEL) record, rid = metadata_dict(generate_metadata(doc), nesteddict=False) for id_, props in (record if isinstance(record, list) else [record]): for k, v in props.iteritems(): props[k] = [ U(item) for item in v ] return {'record' : record}
def list_records(self, set="", resumption_token="", metadataPrefix=""): ''' List records. Use either the resumption token or set id. ''' error = None if resumption_token: params = {'verb' : 'ListRecords', 'resumptionToken': resumption_token} else: params = {'verb' : 'ListRecords', 'metadataPrefix': metadataPrefix, 'set': set} qstr = urllib.urlencode(params) url = self.root + '?' + qstr self.logger.debug('OAI request URL: {0}'.format(url)) start_t = time.time() resp, content = self.h.request(url) retrieved_t = time.time() self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t)) resumption_token = '' if metadataPrefix in ["mods", "marc", "untl"]: xml_content = XML_PARSE(content) records = [] error = getprop(xml_content, "OAI-PMH/error/#text", True) if error is None: for record in xml_content["OAI-PMH"]["ListRecords"]["record"]: id = record["header"]["identifier"] if "null" not in id: records.append((id, record)) if "resumptionToken" in xml_content["OAI-PMH"]["ListRecords"]: resumption_token = xml_content["OAI-PMH"]["ListRecords"]["resumptionToken"] if isinstance(resumption_token, dict): resumption_token = resumption_token.get("#text", "") else: doc = bindery.parse(url, model=LISTRECORDS_MODELS[metadataPrefix]) records, first_id = metadata_dict(generate_metadata(doc), nesteddict=False) for id_, props in records: for k, v in props.iteritems(): props[k] = [ U(item) for item in v ] if (doc.OAI_PMH.ListRecords is not None) and (doc.OAI_PMH.ListRecords.resumptionToken is not None): resumption_token = U(doc.OAI_PMH.ListRecords.resumptionToken) return {'records': records, 'resumption_token': resumption_token, 'error': error}
def list_records(self, set): ''' ''' #e.g. http://dspace.mit.edu/oai/request?verb=ListRecords&metadataPrefix=oai_dc&set=hdl_1721.1_18193 qstr = urllib.urlencode({'verb' : 'ListRecords', 'metadataPrefix': 'oai_dc', 'set': set}) url = self.root + '?' + qstr self.logger.debug('OAI request URL: {0}'.format(url)) start_t = time.time() resp, content = self.h.request(url) retrieved_t = time.time() self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t)) doc = bindery.parse(url, model=OAI_LISTRECORDS_MODEL) #print >> sys.stderr, list(generate_metadata(doc)) records, first_id = metadata_dict(generate_metadata(doc), nesteddict=False) for id_, props in records: for k, v in props.iteritems(): props[k] = [ U(item) for item in v ] return records
def list_records(self, set="", resumption_token="", metadataPrefix=""): ''' List records. Use either the resumption token or set id. ''' if resumption_token: params = {'verb' : 'ListRecords', 'resumptionToken': resumption_token} else: params = {'verb' : 'ListRecords', 'metadataPrefix': metadataPrefix, 'set': set} qstr = urllib.urlencode(params) url = self.root + '?' + qstr self.logger.debug('OAI request URL: {0}'.format(url)) start_t = time.time() resp, content = self.h.request(url) retrieved_t = time.time() self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t)) if metadataPrefix == "mods" or metadataPrefix == "marc": xml_content = XML_PARSE(content) records = [] for record in xml_content["OAI-PMH"]["ListRecords"]["record"]: id = record["header"]["identifier"] if "null" not in id: records.append((id, record)) if "resumptionToken" in xml_content["OAI-PMH"]["ListRecords"]: resumption_token = xml_content["OAI-PMH"]["ListRecords"]["resumptionToken"] else: resumption_token = '' else: doc = bindery.parse(url, model=LISTRECORDS_MODELS[metadataPrefix]) records, first_id = metadata_dict(generate_metadata(doc), nesteddict=False) for id_, props in records: for k, v in props.iteritems(): props[k] = [ U(item) for item in v ] if (doc.OAI_PMH.ListRecords is not None) and (doc.OAI_PMH.ListRecords.resumptionToken is not None): resumption_token = U(doc.OAI_PMH.ListRecords.resumptionToken) else: resumption_token = '' return {'records' : records, 'resumption_token' : resumption_token}