Пример #1
0
    def test_task_process_myads(self):
        msg = {'frequency': 'daily'}

        # can't process without a user ID
        with patch.object(tasks.logger, 'error', return_value=None) as logger:
            tasks.task_process_myads(msg)
            logger.assert_called_with(u"No user ID received for {0}".format(msg))

        msg = {'userid': 123}

        # can't process without a frequency
        with patch.object(tasks.logger, 'error', return_value=None) as logger:
            tasks.task_process_myads(msg)
            logger.assert_called_with(u"No frequency received for {0}".format(msg))

        # process a user (the user should get created during the task)
        msg = {'userid': 123, 'frequency': 'daily'}

        httpretty.register_uri(
            httpretty.GET, self.app.conf['API_VAULT_MYADS_SETUP'] % msg['userid'],
            content_type='application/json',
            status=200,
            body=json.dumps([{'id': 1,
                              'name': 'Query 1',
                              'qid': '1234567890abcdefghijklmnopqrstu1',
                              'active': True,
                              'stateful': True,
                              'frequency': 'daily',
                              'type': 'query'},
                             {'id': 2,
                              'name': 'Query 2',
                              'qid': '1234567890abcdefghijklmnopqrstu2',
                              'active': True,
                              'stateful': False,
                              'frequency': 'weekly',
                              'type': 'query'},
                             {'id': 3,
                              'name': 'Query 3',
                              'qid': '1234567890abcdefghijklmnopqrstu3',
                              'active': True,
                              'stateful': False,
                              'frequency': 'weekly',
                              'type': 'template',
                              'template': 'authors',
                              'data': {'data': 'author:Kurtz'},
                              'query': [{'q': 'author:Kurtz entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]',
                                        'sort': 'score desc, bibcode desc'}]},
                             {'id': 4,
                              'name': 'Query 4',
                              'qid': None,
                              'active': True,
                              'stateful': True,
                              'frequency': 'daily',
                              'type': 'template',
                              'template': 'arxiv',
                              'data': 'star',
                              'classes': ['astro-ph'],
                              'query': [{'q': 'bibstem:arxiv (arxiv_class:(astro-ph.*) (star)) '
                                             'entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]',
                                        'sort': 'score desc, bibcode desc'},
                                        {'q': 'bibstem:arxiv (arxiv_class:(astro-ph.*) NOT (star)) '
                                              'entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]',
                                         'sort': 'bibcode desc'}]
                              }
                             ])
        )

        httpretty.register_uri(
            httpretty.GET, self.app.conf['API_VAULT_EXECUTE_QUERY'] % ('1234567890abcdefghijklmnopqrstu1', 'bibcode,title,author_norm', 10, 'bibcode+desc'),
            content_type='application/json',
            status=200,
            body=json.dumps({'response': {'docs': [{'bibcode': '2019arXiv190800829P',
                                                      'title': ['Gravitational wave signatures from an extended ' +
                                                                 'inert doublet dark matter model'],
                                                      'author_norm': ['Paul, A', 'Banerjee, B', 'Majumdar, D'],
                                                      "identifier": ["2019arXiv190800829P", "arXiv:1908.00829"],
                                                      "year": "2019",
                                                      "bibstem": ["arXiv"]},
                                                     {'bibcode': '2019arXiv190800678L',
                                                      'title': ['Prospects for Gravitational Wave Measurement ' +
                                                                 'of ZTFJ1539+5027'],
                                                      'author_norm': ['Littenberg, T', 'Cornish, N'],
                                                      "identifier": ["2019arXiv190800678L", "arXiv:1908.00678"],
                                                      "year": "2019",
                                                      "bibstem": ["arXiv"]}],
                                           'numFound': 2,
                                           'start': 0},
                            'responseHeader': {'QTime': 5,
                                                'params': {'fl': 'bibcode,title,author_norm,identifier,year,bibstem',
                                                            'q': 'title:"gravity waves" ' +
                                                                  'entdate:[2019-08-03 TO 2019-08-04] bibstem:"arxiv"',
                                                            'rows': '2',
                                                            'start': '0',
                                                            'wt': 'json',
                                                            'x-amzn-trace-id':
                                                                'Root=1-5d3b6518-3b417bec5eee25783a4147f4'},
                                                'status': 0}})
        )

        httpretty.register_uri(httpretty.GET,
                               self.app.conf['API_SOLR_QUERY_ENDPOINT'] +
                               '?q={query}&sort={sort}&fl={fields}&rows={rows}'.format(query=quote_plus('bibstem:arxiv (arxiv_class:(astro-ph.*) (star)) entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]'),
                                                                                       sort=quote_plus('score desc, bibcode desc'),
                                                                                       fields='bibcode,title,author_norm,identifier,year,bibstem',
                                                                                       rows=5),
                               content_type='application/json',
                               status=401
                               )

        with patch.object(self.app, 'get_recent_results') as get_recent_results, \
            patch.object(utils, 'get_user_email') as get_user_email, \
            patch.object(utils, 'payload_to_plain') as payload_to_plain, \
            patch.object(utils, 'payload_to_html') as payload_to_html, \
            patch.object(utils, 'send_email') as send_email, \
            patch.object(tasks.task_process_myads, 'apply_async') as rerun_task:

            get_recent_results.return_value = ['2019arXiv190800829P', '2019arXiv190800678L']
            get_user_email.return_value = '*****@*****.**'
            payload_to_plain.return_value = 'plain payload'
            payload_to_html.return_value = '<em>html payload</em>'
            send_email.return_value = 'this should be a MIMEMultipart object'

            tasks.task_process_myads(msg)
            self.assertTrue(rerun_task.called)

            httpretty.register_uri(
                httpretty.GET, self.app.conf['API_SOLR_QUERY_ENDPOINT'] + '?q={0}&sort={1}&fl={2}&rows={3}'.
                               format('bibstem:arxiv (arxiv_class:(astro-ph.*) (star)) entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]',
                                      'score+desc,+bibcode+desc', 'bibcode,title,author_norm', 5),
                content_type='application/json',
                status=200,
                body=json.dumps({"responseHeader": {"status": 0,
                                                    "QTime": 23,
                                                    "params": {
                                                        "q": 'bibstem:arxiv (arxiv_class:(astro-ph.*) (star)) entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]',
                                                        "x-amzn-trace-id": "Root=1-5d769c6c-f96bfa49d348f03d8ecb7464",
                                                        "fl": "bibcode,title,author_norm",
                                                        "start": "0",
                                                        "sort": "score desc, bibcode desc",
                                                        "rows": "5",
                                                        "wt": "json"}},
                                 "response": {"numFound": 2712,
                                              "start": 0,
                                              "docs": [{"bibcode": "1971JVST....8..324K",
                                                        "title": ["High-Capacity Lead Tin Barrel Dome..."],
                                                        "author_norm": ["Kurtz, J"],
                                                        "identifier": ["1971JVST....8..324K"],
                                                        "year": "1971",
                                                        "bibstem": ["JVST"]},
                                                       {"bibcode": "1972ApJ...178..701K",
                                                        "title": [
                                                            "Search for Coronal Line Emission from the Cygnus Loop"],
                                                        "author_norm": ["Kurtz, D", "Vanden Bout, P", "Angel, J"],
                                                        "identifier": ["1972ApJ...178..701K"],
                                                        "year": "1972",
                                                        "bibstem": ["ApJ"]},
                                                       {"bibcode": "1973ApOpt..12..891K",
                                                        "title": ["Author's Reply to Comments on: Experimental..."],
                                                        "author_norm": ["Kurtz, R"],
                                                        "identifier": ["1973ApOpt..12..891K"],
                                                        "year": "1973",
                                                        "bibstem": ["ApOpt"]},
                                                       {"bibcode": "1973SSASJ..37..725W",
                                                        "title": ["Priming Effect of 15N-Labeled Fertilizers..."],
                                                        "author_norm": ["Westerman, R", "Kurtz, L"],
                                                        "identifier": ["1973SSASJ..37..725W"],
                                                        "year": "1973",
                                                        "bibstem": ["SSASJ"]},
                                                       {"bibcode": "1965JSpRo...2..818K",
                                                        "title": [
                                                            "Orbital tracking and decay analysis of the saturn..."],
                                                        "author_norm": ["Kurtz, H", "McNair, A", "Naumcheff, M"],
                                                        "identifier": ["1965JSpRo...2..818K"],
                                                        "year": "1965",
                                                        "bibstem": ["JSpRo"]}]}})
            )
            httpretty.register_uri(
                httpretty.GET, self.app.conf['API_SOLR_QUERY_ENDPOINT'] + '?q={0}&sort={1}&fl={2}&rows={3}'.
                               format('bibstem:arxiv (arxiv_class:(astro-ph.*) NOT (star)) entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]',
                                      'score+desc,+bibcode+desc', 'bibcode,title,author_norm', 5),
                content_type='application/json',
                status=200,
                body=json.dumps({"responseHeader": {"status": 0,
                                                    "QTime": 23,
                                                    "params": {
                                                        "q": 'bibstem:arxiv (arxiv_class:(astro-ph.*) NOT (star)) entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]',
                                                        "x-amzn-trace-id": "Root=1-5d769c6c-f96bfa49d348f03d8ecb7464",
                                                        "fl": "bibcode,title,author_norm",
                                                        "start": "0",
                                                        "sort": "score desc, bibcode desc",
                                                        "rows": "5",
                                                        "wt": "json"}},
                                 "response": {"numFound": 2712,
                                              "start": 0,
                                              "docs": [{"bibcode": "1971JVST....8..324K",
                                                        "title": ["High-Capacity Lead Tin Barrel Dome..."],
                                                        "author_norm": ["Kurtz, J"],
                                                        "identifier": ["1971JVST....8..324K"],
                                                        "year": "1971",
                                                        "bibstem": ["JVST"]},
                                                       {"bibcode": "1972ApJ...178..701K",
                                                        "title": [
                                                            "Search for Coronal Line Emission from the Cygnus Loop"],
                                                        "author_norm": ["Kurtz, D", "Vanden Bout, P", "Angel, J"],
                                                        "identifier": ["1972ApJ...178..701K"],
                                                        "year": "1972",
                                                        "bibstem": ["ApJ"]},
                                                       {"bibcode": "1973ApOpt..12..891K",
                                                        "title": ["Author's Reply to Comments on: Experimental..."],
                                                        "author_norm": ["Kurtz, R"],
                                                        "identifier": ["1973ApOpt..12..891K"],
                                                        "year": "1973",
                                                        "bibstem": ["ApOpt"]},
                                                       {"bibcode": "1973SSASJ..37..725W",
                                                        "title": ["Priming Effect of 15N-Labeled Fertilizers..."],
                                                        "author_norm": ["Westerman, R", "Kurtz, L"],
                                                        "identifier": ["1973SSASJ..37..725W"],
                                                        "year": "1973",
                                                        "bibstem": ["SSASJ"]},
                                                       {"bibcode": "1965JSpRo...2..818K",
                                                        "title": [
                                                            "Orbital tracking and decay analysis of the saturn..."],
                                                        "author_norm": ["Kurtz, H", "McNair, A", "Naumcheff, M"],
                                                        "identifier": ["1965JSpRo...2..818K"],
                                                        "year": "1965",
                                                        "bibstem": ["JSpRo"]}]}})
            )

            tasks.task_process_myads(msg)
            with self.app.session_scope() as session:
                user = session.query(AuthorInfo).filter_by(id=123).first()
                self.assertEqual(adsputils.get_date().date(), user.last_sent.date())

        msg = {'userid': 123, 'frequency': 'daily', 'force': False}

        httpretty.register_uri(
            httpretty.GET, self.app.conf['API_VAULT_EXECUTE_QUERY'] % ('1234567890abcdefghijklmnopqrstu2', 'bibcode,title,author_norm', 10, 'bibcode+desc'),
            content_type='application/json',
            status=200,
            body=json.dumps({u'response': {u'docs': [{u'bibcode': u'2019arXiv190800829P',
                                                      u'title': [u'Gravitational wave signatures from an ' +
                                                                 u'extended inert doublet dark matter model'],
                                                      u'author_norm': [u'Paul, A', u'Banerjee, B', u'Majumdar, D'],
                                                      u"identifier": [u"2019arXiv190800829P", u"arXiv:1908.00829"],
                                                      u"year": u"2019",
                                                      u"bibstem": [u"arXiv"]},
                                                     {u'bibcode': u'2019arXiv190800678L',
                                                      u'title': [u'Prospects for Gravitational Wave Measurement ' +
                                                                 u'of ZTFJ1539+5027'],
                                                      u'author_norm': [u'Littenberg, T', u'Cornish, N'],
                                                      u"identifier": [u"2019arXiv190800678L", u"arXiv:1908.00678"],
                                                      u"year": u"2019",
                                                      u"bibstem": [u"arXiv"]}],
                                           u'numFound': 2,
                                           u'start': 0},
                             u'responseHeader': {u'QTime': 5,
                                                 u'params': {u'fl': u'bibcode,title,author_norm',
                                                             u'fq': u'{!bitset}',
                                                             u'q': u'*:*',
                                                             u'rows': u'2',
                                                             u'start': u'0',
                                                             u'wt': u'json',
                                                             u'x-amzn-trace-id':
                                                                 u'Root=1-5d3b6518-3b417bec5eee25783a4147f4'},
                                                 u'status': 0}})
        )
        httpretty.register_uri(
            httpretty.GET, self.app.conf['API_SOLR_QUERY_ENDPOINT']+'?q={0}&sort={1}&fl={2}&rows={3}'.
            format('author:Kurtz entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]',
                   'score+desc,+bibcode+desc', 'bibcode,title,author_norm', 5),
            content_type='application/json',
            status=200,
            body=json.dumps({"responseHeader": {"status": 0,
                                                "QTime": 23,
                                                "params": {"q": 'author:Kurtz entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]',
                                                           "x-amzn-trace-id": "Root=1-5d769c6c-f96bfa49d348f03d8ecb7464",
                                                           "fl": "bibcode,title,author_norm",
                                                           "start": "0",
                                                           "sort": "score desc, bibcode desc",
                                                           "rows": "5",
                                                           "wt": "json"}},
                             "response": {"numFound": 2712,
                                          "start": 0,
                                          "docs": [{"bibcode": "1971JVST....8..324K",
                                                    "title": ["High-Capacity Lead Tin Barrel Dome..."],
                                                    "author_norm": ["Kurtz, J"],
                                                    "identifier": ["1971JVST....8..324K"],
                                                    "year": "1971",
                                                    "bibstem": ["JVST"]},
                                                   {"bibcode": "1972ApJ...178..701K",
                                                    "title": ["Search for Coronal Line Emission from the Cygnus Loop"],
                                                    "author_norm": ["Kurtz, D", "Vanden Bout, P", "Angel, J"],
                                                    "identifier": ["1972ApJ...178..701K"],
                                                    "year": "1972",
                                                    "bibstem": ["ApJ"]},
                                                   {"bibcode": "1973ApOpt..12..891K",
                                                    "title": ["Author's Reply to Comments on: Experimental..."],
                                                    "author_norm":["Kurtz, R"],
                                                    "identifier": ["1973ApOpt..12..891K"],
                                                    "year": "1973",
                                                    "bibstem": ["ApOpt"]},
                                                   {"bibcode": "1973SSASJ..37..725W",
                                                    "title": ["Priming Effect of 15N-Labeled Fertilizers..."],
                                                    "author_norm": ["Westerman, R","Kurtz, L"],
                                                    "identifier": ["1973SSASJ..37..725W"],
                                                    "year": "1973",
                                                    "bibstem": ["SSASJ"]},
                                                   {"bibcode": "1965JSpRo...2..818K",
                                                    "title": ["Orbital tracking and decay analysis of the saturn..."],
                                                    "author_norm":["Kurtz, H", "McNair, A", "Naumcheff, M"],
                                                    "identifier": ["1965JSpRo...2..818K"],
                                                    "year": "1965",
                                                    "bibstem": ["JSpRo"]}]}})
        )

        with patch.object(self.app, 'get_recent_results') as get_recent_results, \
            patch.object(utils, 'get_user_email') as get_user_email, \
            patch.object(utils, 'payload_to_plain') as payload_to_plain, \
            patch.object(utils, 'payload_to_html') as payload_to_html, \
            patch.object(utils, 'send_email') as send_email:

            get_recent_results.return_value = ['2019arXiv190800829P', '2019arXiv190800678L']
            get_user_email.return_value = '*****@*****.**'
            payload_to_plain.return_value = 'plain payload'
            payload_to_html.return_value = '<em>html payload</em>'
            send_email.return_value = 'this should be a MIMEMultipart object'

            # already ran today, tried to run again without force=True
            with patch.object(tasks.logger, 'warning', return_value=None) as logger:
                tasks.task_process_myads(msg)
                logger.assert_called_with(u"Email for user {0} already sent today".format(msg['userid']))

            msg = {'userid': 123, 'frequency': 'weekly'}

            # reset user
            with self.app.session_scope() as session:
                user = session.query(AuthorInfo).filter_by(id=123).first()
                user.last_sent = None
                session.add(user)
                session.commit()

            with self.app.session_scope() as session:
                user = session.query(AuthorInfo).filter_by(id=123).first()
                self.assertIsNone(user.last_sent)

            tasks.task_process_myads(msg)

            with self.app.session_scope() as session:
                user = session.query(AuthorInfo).filter_by(id=123).first()
                self.assertEqual(adsputils.get_date().date(), user.last_sent.date())
Пример #2
0
def process_myads(since=None, user_ids=None, user_emails=None, test_send_to=None, admin_email=None, force=False,
                  frequency='daily', test_bibcode=None, **kwargs):
    """
    Processes myADS mailings

    :param since: check for new myADS users since this date
    :param user_ids: users to process claims for, else all users - list (given as adsws IDs)
    :param user_emails: users to process claims for, else all users - list (given as email addresses)
    :param test_send_to: for testing; process a given user ID but send the output to this email address
    :param admin_email: if provided, email is sent to this address at beginning and end of processing (does not trigger
    for processing for individual users)
    :param force: if True, will force processing of emails even if sent for a given user already that day
    :param frequency: basestring; 'daily' or 'weekly'
    :param test_bibcode: bibcode to query to test if Solr searcher has been updated
    :return: no return
    """
    if user_ids:
        for u in user_ids:
            tasks.task_process_myads({'userid': u, 'frequency': frequency, 'force': True,
                                      'test_send_to': test_send_to, 'test_bibcode': test_bibcode})

        logger.info('Done (just the supplied user IDs)')
        return

    if user_emails:
        for u in user_emails:
            r = app.client.get(config.get('API_ADSWS_USER_EMAIL') % u,
                               headers={'Accept': 'application/json',
                                        'Authorization': 'Bearer {0}'.format(config.get('API_TOKEN'))}
                               )
            if r.status_code == 200:
                user_id = r.json()['id']
            else:
                logger.warning('Error getting user ID with email {0} from the API. Processing aborted for this user'.format(u))
                continue

            tasks.task_process_myads({'userid': user_id, 'frequency': frequency, 'force': True,
                                      'test_send_to': test_send_to, 'test_bibcode': test_bibcode})

        logger.info('Done (just the supplied user IDs)')
        return

    logging.captureWarnings(True)

    if admin_email:
        msg = utils.send_email(email_addr=admin_email,
                               payload_plain='Processing started for {}'.format(get_date()),
                               payload_html='Processing started for {}'.format(get_date()),
                               subject='myADS {0} processing has started'.format(frequency))

    # if since keyword not provided, since is set to timestamp of last processing
    if not since or isinstance(since, basestring) and since.strip() == "":
        with app.session_scope() as session:
            if frequency == 'daily':
                kv = session.query(KeyValue).filter_by(key='last.process.daily').first()
            else:
                kv = session.query(KeyValue).filter_by(key='last.process.weekly').first()
            if kv is not None:
                since = kv.value
            else:
                since = '1971-01-01T12:00:00Z'

    users_since_date = get_date(since)
    logger.info('Processing {0} myADS queries since: {1}'.format(frequency, users_since_date.isoformat()))

    last_process_date = get_date()
    all_users = app.get_users(users_since_date.isoformat())

    for user in all_users:
        try:
            tasks.task_process_myads.delay({'userid': user, 'frequency': frequency, 'force': force,
                                            'test_bibcode': test_bibcode})
        except:  # potential backpressure (we are too fast)
            time.sleep(2)
            print 'Conn problem, retrying...', user
            tasks.task_process_myads.delay({'userid': user, 'frequency': frequency, 'force': force,
                                            'test_bibcode': test_bibcode})

    # update last processed timestamp
    with app.session_scope() as session:
        if frequency == 'daily':
            kv = session.query(KeyValue).filter_by(key='last.process.daily').first()
        else:
            kv = session.query(KeyValue).filter_by(key='last.process.weekly').first()
        if kv is None:
            if frequency == 'daily':
                kv = KeyValue(key='last.process.daily', value=last_process_date.isoformat())
            else:
                kv = KeyValue(key='last.process.weekly', value=last_process_date.isoformat())
            session.add(kv)
        else:
            kv.value = last_process_date.isoformat()
        session.commit()

    print 'Done submitting {0} myADS processing tasks for {1} users.'.format(frequency, len(all_users))
    logger.info('Done submitting {0} myADS processing tasks for {1} users.'.format(frequency, len(all_users)))