Exemplo n.º 1
0
def ingest(fn, journal, create, update, path_list):
    "wrapper around the import function with friendlier handling of problems"
    def _(path):
        try:
            results = fn(journal, path, create=create, update=update)
            LOG.debug("results of ingest", extra={'results': results})
            return True
        except KeyboardInterrupt:
            raise
        except BaseException:
            LOG.exception("failed to import article")
            return False
    try:
        lmap(_, path_list)
    except KeyboardInterrupt:
        print('caught interrupt')
        exit(1)
Exemplo n.º 2
0
def fix_broken_locs(stdout, stderr):
    # we have a bunch of (70+) articles with paths like:
    # /opt/bot-lax-adaptor/https:/s3-external-1.amazonaws.com/elife-publishing-expanded/24063.1/7ff76878-4424-48b0-be21-96f8bcfcb55c/elife-24063-v1.xml = /tmp/unpub-article-xml/elife-24063-v1.xml"

    res = models.ArticleFragment.objects \
        .filter(type=models.XML2JSON) \
        .defer('fragment') # prevents lockup

    def fix(frag):
        if not '-meta' in frag.fragment:
            stderr.write('skipping %s, no meta found' % frag)
            return
        loc = frag.fragment['-meta']['location']
        bit = '/opt/bot-lax-adaptor/https:/'
        if loc.startswith(bit):
            newloc = 'https://' + loc[len(bit):]
            frag.fragment['-meta']['location'] = newloc
            frag.save()
            stderr.write('fixed: %s' % newloc)

    utils.lmap(fix, res.iterator())
Exemplo n.º 3
0
    def handle(self, *args, **options):
        try:
            qid = options['qid']
            qid_list = []
            if qid:
                qid_list = [qid]
            else:
                qid_list = models.Query.objects.all().values_list('id',
                                                                  flat=True)

            if not qid_list:
                LOG.info("no query objects found, nothing to upload")
            else:
                fnargs = subdict(options, ['upload'])
                lmap(partial(self.snapshot_query, **fnargs), qid_list)

        except Exception as err:
            LOG.exception(err)
            self.echo(str(err))
            sys.exit(1)

        sys.exit(0)
Exemplo n.º 4
0
    def test_article_can_be_ingested_many_times_before_publication(self):
        "before an article is published it can be ingested many times"
        cases = json1, json2, json3 = lmap(copy.deepcopy, [self.ajson] * 3)

        json2['article']['title'] = 'foo'
        json3['article']['title'] = 'bar'

        # iterate through the three different cases,
        # assert each case is different from last
        prev_fragment = None
        for ajson in cases:
            av = ajson_ingestor.ingest(ajson)
            self.freshen(av)
            fragment = av.article.articlefragment_set.get(type=models.XML2JSON)
            if not prev_fragment:
                prev_fragment = fragment
                continue

            self.assertNotEqual(prev_fragment.fragment, fragment.fragment)
Exemplo n.º 5
0
def article_version_history(msid, only_published=True):
    "returns a list of snippets for the history of the given article"
    article = models.Article.objects.get(manuscript_id=msid)
    avl = article.articleversion_set.all()
    if only_published:
        avl = avl.exclude(datetime_published=None)

    if not avl.count():
        # no article versions available, fail
        raise models.Article.DoesNotExist()

    struct = {
        'received': date_received(article),
        'accepted': date_accepted(article),
        'versions': lmap(article_snippet_json, avl)
    }

    if article.type in EXCLUDE_RECEIVED_ACCEPTED_DATES:
        struct = exsubdict(struct, ['received', 'accepted'])

    return struct
Exemplo n.º 6
0
def relationships(msid, only_published=True):
    "returns all relationships for the given article"
    av = most_recent_article_version(msid, only_published)

    extr = relation_logic.external_relationships_for_article_version(av)
    intr = relation_logic.internal_relationships_for_article_version(av)

    # the internal relationships must be snippets of the latest version of that article
    def relation_snippet(art):
        try:
            return article_snippet_json(most_recent_article_version(art.manuscript_id, only_published))
        except models.Article.DoesNotExist:
            # reference to an article that could not be found!
            # it is either:
            # * a stub (hasn't finished production) or
            # * unpublished (finished production but unpublished)
            # neither are error conditions
            pass
    avl = lfilter(None, lmap(relation_snippet, intr))

    # pull the citation from each external relation
    extcl = [aver.citation for aver in extr]

    return extcl + avl
Exemplo n.º 7
0
def relate_using_citation_list(av, citation_list):
    return lmap(partial(associate, av), citation_list)
Exemplo n.º 8
0
def relate_using_msid_list(av, msid_list, quiet=False):
    return lmap(partial(relate_using_msid, av, quiet=quiet), msid_list)