def get_or_insert(articles): """ takes a list of articles and either gets them from the db or inserts them as new articles if they exist already find articles where title-first author the same, or title-year the same """ res = {} i = 0 for a in articles: a['i'] = i a['canon'] = utils.canonicalize(a['title']) our_article = lookup(canon=a['canon'], mult=False) if our_article: _id = our_article['_id'] res[_id] = our_article else: _id = save(**a) res[_id] = lookup(_id=_id) i += 1 l = res.values() return sorted(l, key=lambda x: x['i'])
def save(url=None, title=None, authors=None, year=None, **doc): """ Called to save an article. """ canon = utils.canonicalize(title) possible_article = lookup(canon=canon) if possible_article: return possible_article['_id'] if type(authors) is unicode: authors = [a.strip() for a in authors.split(',')] doc.update({"canon": canon, "title": title, "authors": authors, "year": year, "url": url}) _id = g.db.articles.insert(doc, safe=True) return _id