Python generate_metadata примеры, amara.bindery.model.generate_metadata Python примеры использования

Пример #1

0

Показать файл

Файл: pubmed_adapter.py Проект: zepheira/open-science

def pubmed_adapter(search=None, id=None):
    '''
    Sample queries:
    #curl "http://localhost:8880/pubmed?"
    curl "http://localhost:8880/pubmed?search=stem+cells"
    curl "http://localhost:8880/pubmed?id=19358275"
    '''
    #FIXME: How do we handle no search or id param?  Just serve up the latest entries?  Or error as below?
    #assert_(not(search and id), msg="You must specify the 'search' or 'id' query parameter is mandatory.")
    if search:
        #search = first_item(search)
        #reldate: only search for last N days
        #query = urllib.urlencode({'db' : NCBI_DB, 'term': query, 'reldate': '60', 'datetype': 'edat', 'retmax': DEFAULT_MAX_RESULTS, 'usehistory': 'y'})
        query = urllib.urlencode({'term': search, 'db' : NCBI_DB, 'datetype': 'edat', 'retmax': DEFAULT_MAX_RESULTS, 'usehistory': 'y'})
        search_url = NCBI_SEARCH_PATTERN + query
        logger.debug("Term search URL: " + search_url)
        doc = bindery.parse(search_url, standalone=True)
        search_terms = search
        ids = ( unicode(i) for i in doc.eSearchResult.IdList.Id )
        ids = ','.join(ids)
        self_link = '/pubmed?search='+search
    else:
        #ids = first_item(id)
        #fulltext = fulltext[0] if fulltext else u'no'
        #if fulltext == 'yes':
        search_terms = ids
        self_link = '/pubmed?id='+ids
    query = urllib.urlencode({'db' : NCBI_DB, 'id': ids, 'retmode': 'xml'})
    search_url = NCBI_ARTICLE_ACCESS_PATTERN + query
    logger.debug("ID search URL: " + search_url)
    alt_link = search_url
    doc = bindery.parse(search_url, standalone=True, model=PUBMED_MODEL)
    #doc = bindery.parse(open('/Users/uche/tmp/efetch.fcgi.html'), standalone=True, model=PUBMED_MODEL)
    metadata, first_id = metadata_dict(generate_metadata(doc))
    return atom_results(doc, metadata, self_link, alt_link, search_terms)

Пример #2

0

Показать файл

Файл: oai.py Проект: eldios/ingestion

    def list_records(self, set="", resumption_token = ""):
        '''
        List records. Use either the resumption token or set id.
        '''
        if resumption_token:
            params = {'verb' : 'ListRecords', 'resumptionToken': resumption_token}
        else:
            params = {'verb' : 'ListRecords', 'metadataPrefix': 'oai_dc', 'set': set}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))
        doc = bindery.parse(url, model=OAI_LISTRECORDS_MODEL)

        records, first_id = metadata_dict(generate_metadata(doc),
                                          nesteddict=False)
        for id_, props in records:
            for k, v in props.iteritems():
                props[k] = [ U(item) for item in v ]
        if (doc.OAI_PMH.ListRecords is not None) and (doc.OAI_PMH.ListRecords.resumptionToken is not None):
            resumption_token = U(doc.OAI_PMH.ListRecords.resumptionToken)
        else:
            resumption_token = ''
        return {'records' : records, 'resumption_token' : resumption_token}

Пример #3

0

Показать файл

Файл: moin.py Проект: mredar/akara

 def factory(rest_uri, moin_link=None, opener=None):
     opener = opener or urllib2.build_opener()
     logger.debug('rest_uri: ' + rest_uri)
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     resp = opener.open(req)
     doc = bindery.parse(resp, standalone=True, model=MOIN_DOCBOOK_MODEL)
     original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #amara.xml_print(self.content_cache)
     metadata, first_id = metadata_dict(generate_metadata(doc))
     metadata = metadata[first_id]
     akara_type = U(metadata[u'ak-type'])
     logger.debug('Type: ' + akara_type)
     try:
         #Older Moin CMS resource types are implemented by registration to the global node.NODES
         cls = node.NODES[akara_type]
     except KeyError:
         #Newer Moin CMS resource types are implemented by discovery of a URL,
         #to which a POST request executes the desired action
         return node.ENDPOINTS and (rest_uri, akara_type,
                                    node.ENDPOINTS[akara_type], doc,
                                    metadata, original_wiki_base)
     else:
         instance = cls(rest_uri,
                        moin_link,
                        opener,
                        cache=(doc, metadata, original_wiki_base))
         return instance

Пример #4

0

Показать файл

    def search(self, term):
        qstr = urllib.urlencode({'verb' : 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': dspace_id})
        url = DSPACE_OAI_ENDPOINT + '?' + qstr
        logger.debug('DSpace URL: ' + str(url))
        #keywords = [ (k.strip(), JOVE_TAG) for k in unicode(row.xml_select(u'string(.//*[@class="keywords"])')).split(',') ]

        doc = bindery.parse(url, model=OAI_MODEL)
        #print >> sys.stderr, list(generate_metadata(doc))
        resources, first_id = metadata_dict(generate_metadata(doc), nesteddict=False)
        record = doc.OAI_PMH

        resource = resources[first_id]

Пример #5

0

Показать файл

Файл: oai.py Проект: dpla/zen

    def search(self, term):
        qstr = urllib.urlencode({'verb' : 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': dspace_id})
        url = DSPACE_OAI_ENDPOINT + '?' + qstr
        logger.debug('DSpace URL: ' + str(url))
        #keywords = [ (k.strip(), JOVE_TAG) for k in unicode(row.xml_select(u'string(.//*[@class="keywords"])')).split(',') ]

        doc = bindery.parse(url, model=OAI_MODEL)
        #print >> sys.stderr, list(generate_metadata(doc))
        resources, first_id = metadata_dict(generate_metadata(doc), nesteddict=False)
        record = doc.OAI_PMH

        resource = resources[first_id]

Пример #6

0

Показать файл

Файл: oaitools.py Проект: mredar/akara

def atomize_oai_record(endpoint=None, id=None):
    '''
    endpoint - the OAI request URL, e.g. http://dspace.mit.edu/oai/request
    id, e.g. the article ID, e.g. oai:dspace.mit.edu:1721.1/5451
    
    Sample request:
    curl "http://localhost:8880/akara.oai.atom?endpoint=http://dspace.mit.edu/oai/request&id=oai:dspace.mit.edu:1721.1/5451"
    '''
    if endpoint is None:
        raise ValueError('endpoint required')
    if id is None:
        raise ValueError('id required')
    qstr = urllib.urlencode({
        'verb': 'GetRecord',
        'metadataPrefix': 'oai_dc',
        'identifier': id
    })
    url = endpoint + '?' + qstr
    doc = bindery.parse(url, model=OAI_MODEL)
    resources = metadata_dict(generate_metadata(doc))
    #print resources
    f = feed(ATOM_ENVELOPE)
    #f = feed(ATOM_ENVELOPE, title=resources['title'], id=resources['id'])
    #f.source.feed.xml_append(E((ATOM_NAMESPACE, u'link'), {u'rel': u'self', u'type': u'application/atom+xml', u'href': self_link.decode('utf-8')}))
    #f.source.feed.xml_append(E((ATOM_NAMESPACE, u'link'), {u'rel': u'search', u'type': u'application/opensearchdescription+xml', u'href': u'http://kds-kci.zepheira.com/sciencedirect.discovery'}))
    #f.source.feed.xml_append(E((ATOM_NAMESPACE, u'link'), {u'rel': u'alternate', u'type': u'text/xml', u'href': alt_link.decode('utf-8')}))
    #f.source.feed.xml_append(E((OPENSEARCH_NAMESPACE, u'Query'), {u'role': u'request', u'searchTerms': search_terms.decode('utf-8')}))
    #maxarticles = DEFAULT_MAX_RESULTS
    maxarticles = 3
    for record in islice(doc.OAI_PMH, 0, maxarticles):
        resource = unicode(resources[id])
        print resource
        authors = [(a, None, None) for a in unicode(resource[u'creator'])]
        links = [
            (unicode(resource['handle']), u'alternate'),
        ]
        #categories = [ (unicode(k), SD_NS+u'authorKeyword') for k in authkw(article) ]
        #elements = [
        #    E((SD_NS, u'sd:journal-cover'), unicode(article.journalCover).strip() if hasattr(article, 'journalCover') else DEFAULT_ICON),
        #    E((SD_NS, u'sd:journal-name'), unicode(article.journalName)),
        #]
        f.append(
            id,
            unicode(resource['title'][0]),
            updated=unicode(resource['date'][0]),
            summary=unicode(resource['description'][0]),
            authors=authors,
            links=links,
            #categories=categories,
            #elements=elements,
        )

    return f.source.xml_encode('xml-indent')

Пример #7

0

Показать файл

Файл: moincms.py Проект: dpla/akara

 def __init__(self, rest_uri, opener):
     self.rest_uri = rest_uri
     self.opener = opener
     #from node.factory
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     print >> sys.stderr, 'rest_uri: ', rest_uri
     with closing(opener.open(req)) as resp:
         doc = bindery.parse(resp, standalone=True, model=MOIN_DOCBOOK_MODEL)
         original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
         #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
         #amara.xml_print(self.content_cache)
     metadata = metadata_dict(generate_metadata(doc))
     self.cache=(doc, metadata, original_wiki_base)
     return

Пример #8

0

Показать файл

Файл: oaitools.py Проект: dpla/akara

def atomize_oai_record(endpoint=None, id=None):
    '''
    endpoint - the OAI request URL, e.g. http://dspace.mit.edu/oai/request
    id, e.g. the article ID, e.g. oai:dspace.mit.edu:1721.1/5451
    
    Sample request:
    curl "http://localhost:8880/akara.oai.atom?endpoint=http://dspace.mit.edu/oai/request&id=oai:dspace.mit.edu:1721.1/5451"
    '''
    if endpoint is None:
        raise ValueError('endpoint required')
    if id is None:
        raise ValueError('id required')
    qstr = urllib.urlencode({'verb' : 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': id})
    url = endpoint + '?' + qstr
    doc = bindery.parse(url, model=OAI_MODEL)
    resources = metadata_dict(generate_metadata(doc))
    #print resources
    f = feed(ATOM_ENVELOPE)
    #f = feed(ATOM_ENVELOPE, title=resources['title'], id=resources['id'])
    #f.source.feed.xml_append(E((ATOM_NAMESPACE, u'link'), {u'rel': u'self', u'type': u'application/atom+xml', u'href': self_link.decode('utf-8')}))
    #f.source.feed.xml_append(E((ATOM_NAMESPACE, u'link'), {u'rel': u'search', u'type': u'application/opensearchdescription+xml', u'href': u'http://kds-kci.zepheira.com/sciencedirect.discovery'}))
    #f.source.feed.xml_append(E((ATOM_NAMESPACE, u'link'), {u'rel': u'alternate', u'type': u'text/xml', u'href': alt_link.decode('utf-8')}))
    #f.source.feed.xml_append(E((OPENSEARCH_NAMESPACE, u'Query'), {u'role': u'request', u'searchTerms': search_terms.decode('utf-8')}))
    #maxarticles = DEFAULT_MAX_RESULTS
    maxarticles = 3
    for record in islice(doc.OAI_PMH, 0, maxarticles):
        resource = unicode(resources[id])
        print resource
        authors = [ (a, None, None) for a in unicode(resource[u'creator']) ]
        links = [
            (unicode(resource['handle']), u'alternate'),
        ]
        #categories = [ (unicode(k), SD_NS+u'authorKeyword') for k in authkw(article) ]
        #elements = [
        #    E((SD_NS, u'sd:journal-cover'), unicode(article.journalCover).strip() if hasattr(article, 'journalCover') else DEFAULT_ICON),
        #    E((SD_NS, u'sd:journal-name'), unicode(article.journalName)),
        #]
        f.append(
            id,
            unicode(resource['title'][0]),
            updated=unicode(resource['date'][0]),
            summary=unicode(resource['description'][0]),
            authors=authors,
            links=links,
            #categories=categories,
            #elements=elements,
        )

    return f.source.xml_encode('xml-indent')

Пример #9

0

Показать файл

 def __init__(self, rest_uri, opener):
     self.rest_uri = rest_uri
     self.opener = opener
     #from node.factory
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     print >> sys.stderr, 'rest_uri: ', rest_uri
     with closing(opener.open(req)) as resp:
         doc = bindery.parse(resp,
                             standalone=True,
                             model=MOIN_DOCBOOK_MODEL)
         original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
         #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
         #amara.xml_print(self.content_cache)
     metadata = metadata_dict(generate_metadata(doc))
     self.cache = (doc, metadata, original_wiki_base)
     return

Пример #10

0

Показать файл

Файл: oai.py Проект: amber-reichert/ingestion

    def get_record(self, id):
        params = {'verb': 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': id}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s',format(retrieved_t - start_t))
        doc = bindery.parse(url, model=OAI_GETRECORD_MODEL)

        record, rid = metadata_dict(generate_metadata(doc), nesteddict=False)
        for id_, props in (record if isinstance(record, list) else [record]):
            for k, v in props.iteritems():
                props[k] = [ U(item) for item in v ]

        return {'record' : record}

Пример #11

0

Показать файл

    def get_record(self, id):
        params = {'verb': 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': id}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s',format(retrieved_t - start_t))
        doc = bindery.parse(url, model=OAI_GETRECORD_MODEL)

        record, rid = metadata_dict(generate_metadata(doc), nesteddict=False)
        for id_, props in (record if isinstance(record, list) else [record]):
            for k, v in props.iteritems():
                props[k] = [ U(item) for item in v ]

        return {'record' : record}

Пример #12

0

Показать файл

Файл: oai.py Проект: amber-reichert/ingestion

    def list_records(self, set="", resumption_token="", metadataPrefix=""):
        '''
        List records. Use either the resumption token or set id.
        '''
        error = None

        if resumption_token:
            params = {'verb' : 'ListRecords', 'resumptionToken': resumption_token}
        else:
            params = {'verb' : 'ListRecords', 'metadataPrefix': metadataPrefix, 'set': set}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))

        resumption_token = ''
        if metadataPrefix in ["mods", "marc", "untl"]:
            xml_content = XML_PARSE(content)
            records = []
            error = getprop(xml_content, "OAI-PMH/error/#text", True)
            if error is None:
                for record in xml_content["OAI-PMH"]["ListRecords"]["record"]:
                    id = record["header"]["identifier"]
                    if "null" not in id:
                        records.append((id, record))
                if "resumptionToken" in xml_content["OAI-PMH"]["ListRecords"]:
                    resumption_token = xml_content["OAI-PMH"]["ListRecords"]["resumptionToken"]
                    if isinstance(resumption_token, dict):
                        resumption_token = resumption_token.get("#text", "")
        else:
            doc = bindery.parse(url, model=LISTRECORDS_MODELS[metadataPrefix])
            records, first_id = metadata_dict(generate_metadata(doc),
                                            nesteddict=False)
          
            for id_, props in records:
                for k, v in props.iteritems():
                    props[k] = [ U(item) for item in v ]
            if (doc.OAI_PMH.ListRecords is not None) and (doc.OAI_PMH.ListRecords.resumptionToken is not None):
                resumption_token = U(doc.OAI_PMH.ListRecords.resumptionToken)

        return {'records': records, 'resumption_token': resumption_token,
                'error': error}

Пример #13

0

Показать файл

Файл: oai.py Проект: dpla/zen

    def list_records(self, set):
        '''
        '''
        #e.g. http://dspace.mit.edu/oai/request?verb=ListRecords&metadataPrefix=oai_dc&set=hdl_1721.1_18193
        qstr = urllib.urlencode({'verb' : 'ListRecords', 'metadataPrefix': 'oai_dc', 'set': set})
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))
        doc = bindery.parse(url, model=OAI_LISTRECORDS_MODEL)
        #print >> sys.stderr, list(generate_metadata(doc))
        records, first_id = metadata_dict(generate_metadata(doc),
                                          nesteddict=False)
        for id_, props in records:
            for k, v in props.iteritems():
                props[k] = [ U(item) for item in v ]

        return records

Пример #14

0

Показать файл

Файл: moincms.py Проект: dpla/akara

 def factory(rest_uri, relative, outputdir):
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     resp = urllib2.urlopen(req)
     doc = bindery.parse(resp, standalone=True, model=MOIN_DOCBOOK_MODEL)
     original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #amara.xml_print(self.content_cache)
     output = os.path.join(outputdir, relative)
     parent_dir = os.path.split(output)[0]
     try:
         os.makedirs(parent_dir)
     except OSError:
         pass
     metadata, first_id = metadata_dict(generate_metadata(doc))
     metadata = metadata[first_id]
     akara_type = first_item(first_item(metadata[u'ak-type']))
     #import sys; print >> sys.stderr, 'GRIPPO', akara_type.xml_value
     cls = node.NODES[akara_type.xml_value]
     instance = cls(rest_uri, relative, outputdir, cache=(doc, metadata, original_wiki_base))
     return instance

Пример #15

0

Показать файл

    def list_records(self, set="", resumption_token="", metadataPrefix=""):
        '''
        List records. Use either the resumption token or set id.
        '''
        if resumption_token:
            params = {'verb' : 'ListRecords', 'resumptionToken': resumption_token}
        else:
            params = {'verb' : 'ListRecords', 'metadataPrefix': metadataPrefix, 'set': set}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))

        if metadataPrefix == "mods" or metadataPrefix == "marc":
            xml_content = XML_PARSE(content)
            records = []
            for record in xml_content["OAI-PMH"]["ListRecords"]["record"]:
                id = record["header"]["identifier"]
                if "null" not in id:
                    records.append((id, record))
            if "resumptionToken" in xml_content["OAI-PMH"]["ListRecords"]:
                resumption_token = xml_content["OAI-PMH"]["ListRecords"]["resumptionToken"]
            else:
                resumption_token = ''
        else:
            doc = bindery.parse(url, model=LISTRECORDS_MODELS[metadataPrefix])
            records, first_id = metadata_dict(generate_metadata(doc),
                                            nesteddict=False)
          
            for id_, props in records:
                for k, v in props.iteritems():
                    props[k] = [ U(item) for item in v ]
            if (doc.OAI_PMH.ListRecords is not None) and (doc.OAI_PMH.ListRecords.resumptionToken is not None):
                resumption_token = U(doc.OAI_PMH.ListRecords.resumptionToken)
            else:
                resumption_token = ''

        return {'records' : records, 'resumption_token' : resumption_token}

Пример #16

0

Показать файл

    def test_metadata_extraction(self):
        """Test metadata extraction"""
        model = schematron_model(MODEL_A)
        doc = bindery.parse(INSTANCE_A_1, model=model)
        metadata = generate_metadata(doc)
        EXPECTED_MD = [(u'ep', u'place', u'Hailey,ID'),
                       (u'tse', u'place', u'Stamford,CT'),
                       (u'tse', u'opus', u'r2e0e3e5'),
                       (u'r2e0e3e5', u'title', u'The Wasteland'),
                       (u'tse', u'tag', u'old possum'),
                       (u'tse', u'tag', u'poet'),
                       (u'lh', u'place', u'Harlem,NY'),
                       (u'lh', u'tag', u'poet'),
                       (u'co', u'place', u'Idoto,Anambra'),
                       (u'co', u'opus', u'r2e0e7e5'),
                       (u'r2e0e7e5', u'title', u"Heaven's Gate"),
                       (u'co', u'tag', u'biafra'), (u'co', u'tag', u'poet')]

        #print list(metadata)
        meta_list = normalize_generated_ids(list(metadata))
        self.assertEqual(meta_list, normalize_generated_ids(EXPECTED_MD))

Пример #17

0

Показать файл

Файл: test_schematron_model.py Проект: abed-hawa/amara

    def test_metadata_extraction(self):
        """Test metadata extraction"""
        model = schematron_model(MODEL_A)
        doc = bindery.parse(INSTANCE_A_1, model=model)
        metadata = generate_metadata(doc)
        EXPECTED_MD = [(u'ep', u'place', u'Hailey,ID'),
         (u'tse', u'place', u'Stamford,CT'),
         (u'tse', u'opus', u'r2e0e3e5'),
         (u'r2e0e3e5', u'title', u'The Wasteland'),
         (u'tse', u'tag', u'old possum'),
         (u'tse', u'tag', u'poet'),
         (u'lh', u'place', u'Harlem,NY'),
         (u'lh', u'tag', u'poet'),
         (u'co', u'place', u'Idoto,Anambra'),
         (u'co', u'opus', u'r2e0e7e5'),
         (u'r2e0e7e5', u'title', u"Heaven's Gate"),
         (u'co', u'tag', u'biafra'),
         (u'co', u'tag', u'poet')]

        #print list(metadata)
        meta_list = normalize_generated_ids(list(metadata))
        self.assertEqual(meta_list, normalize_generated_ids(EXPECTED_MD))

Пример #18

0

Показать файл

 def factory(rest_uri, relative, outputdir):
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     resp = urllib2.urlopen(req)
     doc = bindery.parse(resp, standalone=True, model=MOIN_DOCBOOK_MODEL)
     original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #amara.xml_print(self.content_cache)
     output = os.path.join(outputdir, relative)
     parent_dir = os.path.split(output)[0]
     try:
         os.makedirs(parent_dir)
     except OSError:
         pass
     metadata, first_id = metadata_dict(generate_metadata(doc))
     metadata = metadata[first_id]
     akara_type = first_item(first_item(metadata[u'ak-type']))
     #import sys; print >> sys.stderr, 'GRIPPO', akara_type.xml_value
     cls = node.NODES[akara_type.xml_value]
     instance = cls(rest_uri,
                    relative,
                    outputdir,
                    cache=(doc, metadata, original_wiki_base))
     return instance

Пример #19

0

Показать файл

Файл: moin.py Проект: dpla/akara

 def factory(rest_uri, moin_link=None, opener=None):
     opener = opener or urllib2.build_opener()
     logger.debug('rest_uri: ' + rest_uri)
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     resp = opener.open(req)
     doc = bindery.parse(resp, standalone=True, model=MOIN_DOCBOOK_MODEL)
     original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #amara.xml_print(self.content_cache)
     metadata, first_id = metadata_dict(generate_metadata(doc))
     metadata = metadata[first_id]
     akara_type = U(metadata[u'ak-type'])
     logger.debug('Type: ' + akara_type)
     try:
         #Older Moin CMS resource types are implemented by registration to the global node.NODES
         cls = node.NODES[akara_type]
     except KeyError:
         #Newer Moin CMS resource types are implemented by discovery of a URL,
         #to which a POST request executes the desired action
         return node.ENDPOINTS and (rest_uri, akara_type, node.ENDPOINTS[akara_type], doc, metadata, original_wiki_base)
     else:
         instance = cls(rest_uri, moin_link, opener, cache=(doc, metadata, original_wiki_base))
         return instance

Пример #20

0

Показать файл

Файл: dspace_adapter.py Проект: zepheira/open-science

def dspace_adapter(search=None, id=None):
    '''
    Sample queries:
    curl "http://*****:*****@class="result_table"]//*[@class="article_title"]'):
        for li in islice(doc.xml_select(u'//*[@id="'+RESULTS_DIV+'"]//*[@class="artifact-description"]/..'), 0, maxarticles):
            row = li.xml_parent.xml_parent
            title = li.xml_select(u'.//*[@class="artifact-title"]')[0]
            rel_id = title.a.href.partition(u'/handle/')[2]
            dspace_id = DSPACE_ID_BASE + rel_id
            alt_link = DSPACE_ARTICLE_BASE + u'1721.1/7488'
            #Do not quote.  DSpace doesn't like that
            #alt_link = DSPACE_ARTICLE_BASE + urllib.quote(u'1721.1/7488', '')
            title = unicode(title)
            summary = unicode(row.xml_select(u'string(.//*[@class="summary"])'))
            updated = unicode(row.xml_select(u'string(.//*[@class="date"])')).strip().partition(u'Published: ')[2]
            #updated = time.strptime(updated, "%m/%d/%Y %H:%M:%S") #2/11/2008 2:20:00 AM
            authors = [ (name.strip(), None, None) for name in unicode(row.xml_select(u'string(.//*[@class="author"]//b)')).split(';') ]

            #Retrieve the DSpace page
            qstr = urllib.urlencode({'verb' : 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': dspace_id})
            url = DSPACE_OAI_ENDPOINT + '?' + qstr
            print >> sys.stderr, url
            #keywords = [ (k.strip(), JOVE_TAG) for k in unicode(row.xml_select(u'string(.//*[@class="keywords"])')).split(',') ]

            doc = bindery.parse(url, model=OAI_MODEL)
            #print >> sys.stderr, list(generate_metadata(doc))
            resources, first_id = metadata_dict(generate_metadata(doc))
            record = doc.OAI_PMH

            resource = resources[first_id]

            authors = [ (a, None, None) for a in resource[u'creator'] ]
            links = [
                (DSPACE_ARTICLE_BASE + rel_id, u'alternate'),
                (u'dspace?id=' + dspace_id, u'self'),
            ]
            elements = [
                E((ATOM_NAMESPACE, u'content'), {u'src': alt_link}),
            ]
            f.append(
                dspace_id,
                U(resource['title']),
                updated=U(resource['date']),
                summary=U(resource['description']),
                authors=authors,
                links=links,
                #categories=categories,
                elements=elements,
            )

        #FIXME: indent
        return f.xml_encode()

Python generate_metadata примеры использования