""" import json import couchdb from pubrefdb import configuration def undump(db, infile): data = json.load(infile) for id, doc in data.iteritems(): try: del doc['_rev'] except KeyError: pass try: olddoc = db[id] except couchdb.http.ResourceNotFound: pass else: doc['_rev'] = olddoc['_rev'] db.save(doc) print id, doc['entitytype'] if __name__ == '__main__': infile = open('dump.json') undump(configuration.get_db(), infile) infile.close()
""" PubRefDb: Web application for a database of publications. Dump all documents having a defined entitytype to a file. """ import json from pubrefdb import configuration def dump(db, outfile): data = dict() for id in db: doc = db[id] if doc.has_key('entitytype'): data[doc['_id']] = dict(doc) print id, doc['entitytype'] json.dump(data, outfile) if __name__ == '__main__': import os dirpath = os.path.expanduser('~/dumps/pubrefdb') filepath = os.path.join(dirpath, "dump_%s.json" % configuration.get_date()) outfile = open(filepath, 'wb') dump(configuration.get_db(), outfile) outfile.close()
""" view = db.view('publication/incomplete', include_docs=True) for item in view: pmid = item.value if not pmid: continue if log: print 'Checking PMID', pmid article = pubmed.Article(pmid) if article.pmid: patch_publication(db, item.doc, article, log=log) time.sleep(delay) def patch_publication(db, doc, article, log): with PublicationSaver(db, doc=doc): doc['type'] = article.type doc['published'] = article.published for key in ['volume', 'issue', 'pages']: if not doc['journal'].get(key): doc['journal'][key] = article.journal.get(key) if log: print 'Updated', article.pmid, article.title return doc if __name__ == '__main__': import os import sys db = configuration.get_db() patch(db, log=os.isatty(sys.stdin.fileno()))
if name.lower() not in names: pis[i] = None pis = [pi for pi in pis if pi is not None] return [(pi.get('normalized_name', pi['name']), [a.strip() for a in pi['affiliation'].split(',')]) for pi in pis] def add_publication(db, pmid): """Add the publication to the database if not already in it. Skip if the PMID has been excluded. Set the tag 'SciLifeLab' if marked such in the affiliation. """ if len(db.view('publication/xref')[['pubmed', pmid]]) > 0: return if len(db.view('publication/excluded')[['pubmed', pmid]]) > 0: return article = pubmed.Article(pmid) if not article.pmid: return affiliation = article.affiliation or '' affiliation = affiliation.lower() for key in ['science for life laboratory', 'scilifelab']: if key in affiliation: article.tags.append('SciLifeLab') break with PublicationSaver(db, doc=article.get_data()) as doc: pass return doc if __name__ == '__main__': import sys fetch(configuration.get_db(), pinames=sys.argv[1:])