Пример #1
0
"""

import json

import couchdb

from pubrefdb import configuration


def undump(db, infile):
    data = json.load(infile)
    for id, doc in data.iteritems():
        try:
            del doc['_rev']
        except KeyError:
            pass
        try:
            olddoc = db[id]
        except couchdb.http.ResourceNotFound:
            pass
        else:
            doc['_rev'] = olddoc['_rev']
        db.save(doc)
        print id, doc['entitytype']


if __name__ == '__main__':
    infile = open('dump.json')
    undump(configuration.get_db(), infile)
    infile.close()
Пример #2
0
""" PubRefDb: Web application for a database of publications.

Dump all documents having a defined entitytype to a file.
"""

import json

from pubrefdb import configuration


def dump(db, outfile):
    data = dict()
    for id in db:
        doc = db[id]
        if doc.has_key('entitytype'):
            data[doc['_id']] = dict(doc)
            print id, doc['entitytype']
    json.dump(data, outfile)


if __name__ == '__main__':
    import os
    dirpath = os.path.expanduser('~/dumps/pubrefdb')
    filepath = os.path.join(dirpath, "dump_%s.json" % configuration.get_date())
    outfile = open(filepath, 'wb')
    dump(configuration.get_db(), outfile)
    outfile.close()
Пример #3
0
    """
    view = db.view('publication/incomplete', include_docs=True)
    for item in view:
        pmid = item.value
        if not pmid: continue
        if log:
            print 'Checking PMID', pmid
        article = pubmed.Article(pmid)
        if article.pmid:
            patch_publication(db, item.doc, article, log=log)
            time.sleep(delay)


def patch_publication(db, doc, article, log):
    with PublicationSaver(db, doc=doc):
        doc['type'] = article.type
        doc['published'] = article.published
        for key in ['volume', 'issue', 'pages']:
            if not doc['journal'].get(key):
                doc['journal'][key] = article.journal.get(key)
        if log:
            print 'Updated', article.pmid, article.title
    return doc


if __name__ == '__main__':
    import os
    import sys
    db = configuration.get_db()
    patch(db, log=os.isatty(sys.stdin.fileno()))
Пример #4
0
            if name.lower() not in names:
                pis[i] = None
        pis = [pi for pi in pis if pi is not None]
    return [(pi.get('normalized_name', pi['name']),
             [a.strip() for a in pi['affiliation'].split(',')])
            for pi in pis]

def add_publication(db, pmid):
    """Add the publication to the database if not already in it.
    Skip if the PMID has been excluded.
    Set the tag 'SciLifeLab' if marked such in the affiliation.
    """
    if len(db.view('publication/xref')[['pubmed', pmid]]) > 0: return
    if len(db.view('publication/excluded')[['pubmed', pmid]]) > 0: return
    article = pubmed.Article(pmid)
    if not article.pmid: return
    affiliation = article.affiliation or ''
    affiliation = affiliation.lower()
    for key in ['science for life laboratory', 'scilifelab']:
        if key in affiliation:
            article.tags.append('SciLifeLab')
            break
    with PublicationSaver(db, doc=article.get_data()) as doc:
        pass
    return doc


if __name__ == '__main__':
    import sys
    fetch(configuration.get_db(), pinames=sys.argv[1:])