def test_search_get_range_rows(self):
        page = Pages(qid='wxyz', content='hi')
        page2 = Pages(qid='wxyz2', content='hi 2')
        self.app.db.session.add(page)
        self.app.db.session.add(page2)
        self.app.db.session.commit()

        begin = get_date(dt.datetime.utcnow()) - dt.timedelta(hours=1)
        end = get_date(dt.datetime.utcnow()) + dt.timedelta(hours=1)

        # url_for translates to:
        # '/search?begin=2018-12-26T18%3A27%3A02.367394%2B00%3A00&rows=1&end=2018-12-26T20%3A27%3A02.367412%2B00%3A00'
        r = self.client.get(
            url_for('turbobee_app.search',
                    begin=begin.isoformat(),
                    end=end.isoformat(),
                    rows=1))

        first_page = r.json[0]
        created = get_date(first_page['created'])

        first_page = r.json[0]
        created = dateutil.parser.parse(first_page['created'])

        self.assertLess(begin, created)
        self.assertGreater(end, created)
        self.assertEqual(r.status_code, 200)
Exemplo n.º 2
0
def get_access_token():
    '''Exchange 'code' for 'access_token' data'''
    payload = request.args.to_dict()
    if 'code' not in payload:
        raise Exception('Parameter code is missing')
    headers = {'Accept': 'application/json'}
    data = {
      'client_id': current_app.config['ORCID_CLIENT_ID'],
      'client_secret': current_app.config['ORCID_CLIENT_SECRET'],
      'code': payload['code'],
      'grant_type': 'authorization_code'
    }
    #print current_app.config['ORCID_OAUTH_ENDPOINT'], data, headers

    # do not use connection pool, always establish a new connection to the orcid remote server
    # we were having issue with dropped connectins mid-stream and this request is not idempotent
    # therefore we can't retry
    try:
        r = requests.post(current_app.config['ORCID_OAUTH_ENDPOINT'], data=data, headers=headers,
                          timeout=current_app.config.get('CONNECTION_TIMEOUT', 30))
    except (ConnectionError, ConnectTimeout, ReadTimeout) as e:
        logging.error('For ORCID code %s, there was a connection error with the ORCID API'.format(payload['code']))
        return 'There was a connection error with the ORCID API', 502

    if r.status_code != 200:
        logging.error('For ORCID code {}, there was an error getting the token from the ORCID API.'.
                      format(payload['code']))
        return r.text, r.status_code

    # update/create user account
    data = r.json()
    if 'orcid' in data:
        with current_app.session_scope() as session:
            u = session.query(User).filter_by(orcid_id=data['orcid']).options(load_only(User.orcid_id)).first()
            p = session.query(Profile).filter_by(orcid_id=data['orcid']).options(load_only(Profile.orcid_id)).first()
            if not u:
                u = User(orcid_id=data['orcid'], created=adsmutils.get_date())
            if not p:
                p = Profile(orcid_id=data['orcid'], created=adsmutils.get_date())
            u.updated = adsmutils.get_date()
            p. updated = adsmutils.get_date()
            u.access_token = data['access_token']
            # save the user
            session.begin_nested()
            try:
                session.add(u)
                session.add(p)
                session.commit()
            except exc.IntegrityError as e:
                session.rollback()
            # per PEP-0249 a transaction is always in progress
            session.commit()

    return r.text, r.status_code
Exemplo n.º 3
0
def update_profile_local(orcid_id, data=None, force=False):
    """Update local db with ORCID profile"""

    data = json.loads(data)

    with current_app.session_scope() as session:
        profile = session.query(Profile).filter_by(orcid_id=orcid_id).first()
        if not profile:
            logging.error('ORCID profile {} does not exist; creating'.format(orcid_id))
            profile = Profile(orcid_id=orcid_id, created=adsmutils.get_date())
            force = True
        # data assumed to come from ORCID API /works endpoint
        if data:
            # convert milliseconds since epoch to seconds since epoch
            last_modified = data['activities-summary']['last-modified-date']['value']
            last_modified /= 1000.
            if force or (profile.updated < datetime.utcfromtimestamp(last_modified).replace(tzinfo=pytz.utc)):
                works = data['activities-summary']['works']['group']
                new_recs = {}
                update_recs = {}
                orcid_recs = []
                try:
                    current_recs = profile.bibcode.keys()
                except:
                    current_recs = []
                for work in works:
                    try:
                        id0, rec = find_record(work)
                    except:
                        continue
                    if id0 not in current_recs:
                        new_recs.update(rec)
                    else:
                        # if bibcode already in the profile, keep its status
                        rec[id0]['status'] = profile.bibcode[id0]['status']
                        update_recs.update(rec)
                    orcid_recs.append(id0)
                profile.add_records(new_recs)
                profile.add_records(update_recs)
                # remove records from the profile that aren't in the ORCID set
                remove_recs = list(set(current_recs)-set(orcid_recs))
                profile.remove_bibcodes(remove_recs)

        profile.updated = adsmutils.get_date()
        # save the user
        session.begin_nested()
        try:
            session.add(profile)
            session.commit()
        except exc.IntegrityError as e:
            session.rollback()
            logging.warning('ORCID profile database error - updated bibcodes for %s were not saved.'.format(orcid_id))
Exemplo n.º 4
0
def search():

    keys = request.args.keys()

    # default is 50, max is 100
    rows = min(current_app.config.get('MAX_RETURNED', 100),
               int(request.args.get('rows') or 50))
    with current_app.session_scope() as session:

        if 'begin' in keys and 'end' in keys:
            begin = get_date(request.args['begin'])
            end = get_date(request.args['end'])
            query = session.query(Pages).filter(
                Pages.created.between(begin, end))
        elif 'begin' in keys:  # search for all records after begin
            begin = get_date(request.args['begin'])
            query = session.query(Pages).filter(Pages.created >= begin)
        elif 'end' in keys:  # search for all records before end
            end = get_date(request.args['end'])
            query = session.query(Pages).filter(Pages.created <= end)
        elif 'at' in keys:  # search for all records created at specific timestamp
            at = get_date(request.args['at'])
            query = session.query(Pages).filter(Pages.created == at)
        elif 'null' in keys:
            query = session.query(Pages).filter(Pages.created == None)
        else:
            return jsonify({'msg': 'Invalid parameters %s' % keys}), 505

        if 'last_id' in keys:
            query = query.where(Pages.id > keys['last_id'])

        query = query.order_by(Pages.updated.asc()) \
            .limit(rows)

        if 'fields' in keys:  # load only some fields
            allowed_fields = [
                'qid', 'created', 'updated', 'expires', 'lifetime',
                'content_type', 'content'
            ]
            fields = keys.get('fields', allowed_fields)
            fields_to_load = list(set(fields) & set(allowed_fields))
            query = query.options(load_only(*fields_to_load))

        try:
            pages = query.all()
            # it is possible that toJSON() will eagerly load all fields (defeating load_only() above)
            result = map(lambda page: page.toJSON(), pages)
            return jsonify(result)
        except Exception as e:
            current_app.logger.error('Failed request: %s (error=%s)', keys, e)
            return jsonify({'msg': e.message}), 500
Exemplo n.º 5
0
    def get_date(self, date=None):
        """
        :return: UTC date
        """

        self.logger.info('Example of logging within the app.')
        return get_date(date).isoformat()
Exemplo n.º 6
0
    def test_bootstrap(self):
        expires = datetime.datetime.fromordinal(
            adsmutils.get_date().toordinal() + 5)
        params = {
            'expires': expires.isoformat(),
            'ratelimit': 0.001,
            'create_new': False
        }
        r = authenticated_user.get('/accounts/bootstrap', params=params)
        a = r.json()

        r = anonymous_user.get('/accounts/bootstrap', params=params)
        b = r.json()

        # currently fails, it returns 'anonymous' for the
        # authenticated user if the user in db has empty 'is_active' column
        # also, the ratelimits must allow for more clients (i.e. be not fully
        # consumed)
        assert a['username'] != b['username']
        assert a['access_token'] != b['access_token']
        assert a['username'] == 'tester@ads'
        assert b['username'] == 'anonymous@ads'

        # repeating the bootstrap request should give you the
        # same access token
        for x in range(5):
            r = anonymous_user.get(
                '/accounts/bootstrap',
                params=params,
                headers={'Authorization': 'Bearer %s' % b['access_token']})
            assert r.json()['access_token'] == b['access_token']

        for x in range(5):
            r = authenticated_user.get('/accounts/bootstrap', params=params)
            assert r.json()['access_token'] == a['access_token']
    def get_date(self, date=None):
        """
        :return: UTC date
        """

        self.logger.info('Example of logging within the app.')
        return get_date(date).isoformat()
    def test_get_date(self):
        """Check we always work with UTC dates"""

        d = adsmutils.get_date()
        self.assertTrue(d.tzname() == u'UTC')

        d1 = adsmutils.get_date(u'2009-09-04T01:56:35.450686Z')
        self.assertTrue(d1.tzname() == u'UTC')
        self.assertEqual(d1.isoformat(), u'2009-09-04T01:56:35.450686+00:00')

        d2 = adsmutils.get_date(u'2009-09-03T20:56:35.450686-05:00')
        self.assertTrue(d2.tzname() == u'UTC')
        self.assertEqual(d2.isoformat(), u'2009-09-04T01:56:35.450686+00:00')

        d3 = adsmutils.get_date(u'2009-09-03T20:56:35.450686')
        self.assertTrue(d3.tzname() == u'UTC')
        self.assertEqual(d3.isoformat(), u'2009-09-03T20:56:35.450686+00:00')
Exemplo n.º 9
0
    def test_authenticated_user(self):
        # bumblebee config
        r = authenticated_user.get('/vault/configuration')
        assert r.status_code == 200
        assert isinstance(r.json(), dict)
        assert 'link_servers' not in r.json()

        r = authenticated_user.get('/vault/configuration/link_servers')
        assert r.status_code == 200
        assert isinstance(r.json(), list)
        assert len(r.json()) > 100

        foo = get_date().isoformat()
        # server side user storage
        r = authenticated_user.post('/vault/user-data',
                                    json={'link_server': foo})
        assert r.status_code == 200
        assert r.json()['link_server'] == foo

        r = authenticated_user.get('/vault/user-data')
        assert r.status_code == 200
        assert isinstance(r.json(), dict)
        assert r.json()['link_server'] == foo

        # i'm using my own access token, once we switch to a dedicated account
        # made only for testing, the qid will change too
        r = authenticated_user.post('/vault/query', json={'q': '*:*'})
        assert r.status_code == 200
        assert isinstance(r.json(), dict)
        qid = r.json()['qid']  # d6980601bf770d5e4f39f6766336cf87
        assert qid == 'd6980601bf770d5e4f39f6766336cf87'
        # this numFound has fixed value from the time when qid was created, never changed
        numFound = r.json()['numFound']
        assert int(numFound) == 14039148

        r = authenticated_user.get('/vault/query/%s' % qid)
        assert r.status_code == 200
        assert 'numfound' in r.json()

        r = authenticated_user.get('/vault/execute_query/%s' % qid)
        assert r.status_code == 200
        assert r.json()['responseHeader']['params']['q'] == '*:*'
        assert r.json()['responseHeader']['params']['fl'] == 'id'
        assert r.json()['response']
        # this numFound value returns current number of documents, used to check if the DB is populated
        assert r.json(
        )['response']['numFound'] > 15000000  # as of Feb 2021: 15207970

        r = authenticated_user.get('/vault/execute_query/%s?fl=recid' % qid)
        assert r.status_code == 200
        assert r.json()['responseHeader']['params']['q'] == '*:*'
        assert r.json()['responseHeader']['params']['fl'] == 'recid'
        assert r.json()['response']

        # 113dc6ef2e612ffe1a0de9a16e7f494e
        r = authenticated_user.get('/vault/query2svg/%s' % qid)
        assert 'svg' in r.text
        assert r.headers.get('Content-Type') == 'image/svg+xml'
Exemplo n.º 10
0
    def get(self):
        """
        If the current user is unauthenticated, or the current user
        is the "bootstrap" (anon) user, return/create a "BB Client" OAuthClient
        and token depending if "oauth_client" is encoded into their
        session cookie

        If the user is a authenticated as a real user, return/create
        a "BB Client" OAuthClient and token depending if that user already has
        one in the database
        """

        # rca: I'd like to register here my distaste for Flask-Restful and
        # how it divorces parameters; it was a big mistake to go with that framework
        # and the decision shouldn't have been left to inexperienced developers
        # this is not recommended solution, but even the recommended solution
        # is just awful: http://stackoverflow.com/questions/30779584/flask-restful-passing-parameters-to-get-request
        parser = reqparse.RequestParser()
        parser.add_argument('redirect_uri', type=str)
        parser.add_argument('scope', type=str)
        parser.add_argument('client_name', type=str)
        parser.add_argument('ratelimit', type=float)
        parser.add_argument('create_new', type=inputs.boolean)
        parser.add_argument('expires', type=str)
        
        kwargs = parser.parse_args()

        client_name = kwargs.get('client_name', None)
        redirect_uri = kwargs.get('redirect_uri', None)
        ratelimit = kwargs.get('ratelimit', 1.0)
        create_new = kwargs.get('create_new', False)
        expires = kwargs.get('expires', None)
        
        if ratelimit is None:
            ratelimit = 1.0
        
        assert ratelimit >= 0.0
        
        if expires is not None:
            expires = get_date(expires) # throwing error on parse OK
        else:
            expires = datetime.datetime(2500, 1, 1)
            
        # If we visit this endpoint and are unauthenticated, then login as
        # our anonymous user
        if not current_user.is_authenticated():            
            login_user(user_manipulator.first(
                email=current_app.config['BOOTSTRAP_USER_EMAIL']
            ))
        
        if current_user.email == current_app.config['BOOTSTRAP_USER_EMAIL']:
            if 'scopes' in kwargs or client_name or redirect_uri:
                abort(401, "Sorry, you cant change scopes/name/redirect_uri when creating temporary OAuth application")

        try:
            scopes = self._sanitize_scopes(kwargs.get('scope', None))
        except ValidationError, e:
            return {'error': e.value}, 400
Exemplo n.º 11
0
 def test_authenticated_user(self):
     # bumblebee config
     r = authenticated_user.get('/vault/configuration')
     assert r.status_code == 200
     assert isinstance(r.json(), dict)
     assert 'link_servers' not in r.json()
     
     r = authenticated_user.get('/vault/configuration/link_servers')
     assert r.status_code == 200
     assert isinstance(r.json(), list)
     assert len(r.json()) > 100
     
     
     foo = get_date().isoformat()
     # server side user storage
     r = authenticated_user.post('/vault/user-data', json={'link_server': foo})
     assert r.status_code == 200
     assert r.json()['link_server'] == foo
     
     r = authenticated_user.get('/vault/user-data')
     assert r.status_code == 200
     assert isinstance(r.json(), dict)
     assert r.json()['link_server'] == foo
     
     
     # i'm using my own access token, once we switch to a dedicated account
     # made only for testing, the qid will change too
     r = authenticated_user.post('/vault/query', json={'q': '*:*'})
     assert r.status_code == 200
     assert isinstance(r.json(), dict)
     qid = r.json()['qid'] # d6980601bf770d5e4f39f6766336cf87
     numFound = r.json()['numFound']
     assert qid == 'd6980601bf770d5e4f39f6766336cf87'
     
     r = authenticated_user.get('/vault/query/%s' % qid)
     assert r.status_code == 200
     assert 'numfound' in r.json()
     
     r = authenticated_user.get('/vault/execute_query/%s' % qid)
     assert r.status_code == 200
     assert r.json()['responseHeader']['params']['q'] == '*:*'
     assert r.json()['responseHeader']['params']['fl'] == 'id'
     assert r.json()['response']
     assert r.json()['response']['numFound'] > 15000000 # as of Feb 2021: 15207970
     # delta increased to 2 million, as numFound initial stored value in dev vault DB is fixed at 14.03 million
     self.assertAlmostEqual(r.json()['response']['numFound'], int(numFound), delta=2000000)
     
     r = authenticated_user.get('/vault/execute_query/%s?fl=recid' % qid)
     assert r.status_code == 200
     assert r.json()['responseHeader']['params']['q'] == '*:*'
     assert r.json()['responseHeader']['params']['fl'] == 'recid'
     assert r.json()['response']
     
     
     # 113dc6ef2e612ffe1a0de9a16e7f494e
     r = authenticated_user.get('/vault/query2svg/%s' % qid)
     assert 'svg' in r.text
     assert r.headers.get('Content-Type') == 'image/svg+xml'
Exemplo n.º 12
0
def get_access_token():
    '''Exchange 'code' for 'access_token' data'''
    payload = dict(request.args)
    if 'code' not in payload:
        raise Exception('Parameter code is missing')
    headers = {'Accept': 'application/json'}
    data = {
      'client_id': current_app.config['ORCID_CLIENT_ID'],
      'client_secret': current_app.config['ORCID_CLIENT_SECRET'],
      'code': payload['code'][0],
      'grant_type': 'authorization_code'
    }
    #print current_app.config['ORCID_OAUTH_ENDPOINT'], data, headers
    r = current_app.client.post(current_app.config['ORCID_OAUTH_ENDPOINT'], data=data, headers=headers)
    if r.status_code != 200:
        logging.error('For ORCID code {}, there was an error getting the token from the ORCID API.'.
                      format(payload['code'][0]))

    # update/create user account
    data = r.json()
    if 'orcid' in data:
        with current_app.session_scope() as session:
            u = session.query(User).filter_by(orcid_id=data['orcid']).options(load_only(User.orcid_id)).first()
            p = session.query(Profile).filter_by(orcid_id=data['orcid']).options(load_only(Profile.orcid_id)).first()
            if not u:
                u = User(orcid_id=data['orcid'], created=adsmutils.get_date())
            if not p:
                p = Profile(orcid_id=data['orcid'], created=adsmutils.get_date())
            u.updated = adsmutils.get_date()
            p. updated = adsmutils.get_date()
            u.access_token = data['access_token']
            # save the user
            session.begin_nested()
            try:
                session.add(u)
                session.add(p)
                session.commit()
            except exc.IntegrityError as e:
                session.rollback()
            # per PEP-0249 a transaction is always in progress
            session.commit()

    return r.text, r.status_code
Exemplo n.º 13
0
    def test_logging(self):
        logdir = os.path.abspath(
            os.path.join(os.path.dirname(__file__), u'../../logs'))
        foo_log = logdir + u'/foo.bar.log'
        if os.path.exists(foo_log):
            os.remove(foo_log)
        logger = adsmutils.setup_logging(u'foo.bar')
        logger.warning(u'first')
        logger.handlers[0].stream.flush()

        self.assertTrue(os.path.exists(foo_log))
        c = _read_file(foo_log)
        self.assertTrue('WARNING' in c)
        self.assertTrue('test_init.py' in c)
        self.assertTrue('first' in c)

        # now multiline message
        logger.warning(u'second\nthird')
        logger.warning(u'last')
        c = _read_file(foo_log)
        self.assertTrue(u'second\n     third' in c)

        msecs = False
        for x in c.strip().split(u'\n'):
            datestr = x.split(u' ')[0]
            if datestr != u'':
                t = adsmutils.get_date(datestr)
            if t.microsecond > 0:
                msecs = True
        self.assertTrue(msecs)

        # test json formatter
        # replace the default formatter
        for handler in logger.handlers:
            handler.formatter = adsmutils.get_json_formatter()
        logger.info(u'test json formatter')
        c = _read_file(foo_log)
        self.assertTrue(u'"message": "test json formatter"' in c)
        self.assertTrue(u'"hostname":' in c)
        self.assertTrue(u'"lineno":' in c)

        # verfiy that there was only one log handler, logging to a file
        self.assertTrue(len(logger.handlers), 1)
        # now create a logger, requesting logs be written to stdout as well
        #   so there will be two log handlers
        logger2 = adsmutils.setup_logging(name_=u'foo.bar.2',
                                          attach_stdout=True)
        self.assertTrue(len(logger2.handlers), 2)
Exemplo n.º 14
0
def update_profile(orcid_id, data=None):
    """Inserts data into the user record and updates the 'updated'
    column with the most recent timestamp"""

    with current_app.session_scope() as session:
        u = session.query(User).filter_by(orcid_id=orcid_id).options(load_only(User.orcid_id)).first()
        if u:
            u.updated = adsmutils.get_date()
            if data:
                try:
                    #verify the data is a valid JSON
                    u.profile = json.dumps(json.loads(data))
                except:
                    logging.error('Invalid data passed in for {} (ignoring it)'.format(orcid_id))
                    logging.error(data)
            # save the user
            session.begin_nested()
            try:
                session.add(u)
                session.commit()
            except exc.IntegrityError as e:
                session.rollback()
            # per PEP-0249 a transaction is always in progress
            session.commit()
    def test_utcdatetime_type(self):
        base = declarative_base()

        class Test(base):
            __tablename__ = u'testdate'
            id = sa.Column(sa.Integer, primary_key=True)
            created = sa.Column(adsmutils.UTCDateTime, default=adsmutils.get_date)
            updated = sa.Column(adsmutils.UTCDateTime)
        base.metadata.bind = self.app.db.session.get_bind()
        base.metadata.create_all()
        
        with self.app.session_scope() as session:
            session.add(Test())
            m = session.query(Test).first()
            assert m.created
            assert m.created.tzname() == u'UTC'
            assert u'+00:00' in str(m.created)
            
            current = adsmutils.get_date(u'2018-09-07T20:22:02.249389+00:00')
            m.updated = current
            session.commit()
            
            m = session.query(Test).first()
            assert str(m.updated) == str(current)
Exemplo n.º 16
0
 def test_set_get_pages(self):
     msg = TurboBeeMsg()
     now = datetime.utcnow()
     
     msg.created = msg.get_timestamp(now)
     msg.updated = msg.get_timestamp(now)
     msg.expires = msg.get_timestamp(now)
     msg.eol = msg.get_timestamp(now)
     msg.set_value('hello world')
     msg.ctype = msg.ContentType.html
     msg.target = 'https:///some.com'
     msg.owner = 234
     
     r = self.app.set_pages([msg])
     assert 'created' in r
     assert len(r['created']) ==1
     
     pages = list(self.app.get_pages(r['created']))
     expected = {
         'id': 1,
         'target': u'https:///some.com', 
         'content_type': u'application/html', 
         'content': 'hello world', 
         'created': get_date(now).isoformat(), 
         'updated': get_date(now).isoformat(), 
         'expires': get_date(now).isoformat(), 
         'lifetime': get_date(now).isoformat(),
         'owner': 234,
         'qid': pages[0]['qid']
         }
     assert pages[0] == expected
     
     msg.qid = pages[0]['qid']
     r = self.app.set_pages([msg])
     assert 'updated' in r
     assert len(r['updated']) ==1
     assert r['updated'][0] == expected['qid']
     
     msg.status = Status.deleted
     r = self.app.set_pages([msg])
     assert 'deleted' in r
     assert r['deleted'][0] == expected['qid']
     
     r = self.app.set_pages([msg])
     assert r['ignored-deleted'][0] == expected['qid']
     assert len(list(self.app.get_pages(expected['qid']))) == 0
     
     # insert it again
     msg.status = Status.active
     r = self.app.set_pages([msg])
     assert r['created'][0]
     assert r['created'][0] != expected['qid']
     
     l = list(self.app.get_pages(r['created'], fields=['foo', 'qid', 'content', 'created']))
     assert l[0]['qid'] == r['created'][0]
     assert l[0]['created'] == expected['created']
     assert l[0]['content'] == 'hello world'
     assert 'updated' not in l[0]
     assert 'foo' not in l[0]
     
     
     # set multiple objects at once
     msg.qid = r['created'][0]
     msg2 = msg.loads(*msg.dump())
     msg2.qid = ''
     r = self.app.set_pages([msg, msg2])
     assert r['created'][0] 
     assert r['updated'][0] == msg.qid
     
     # update one by one
     msg2.qid = r['created'][0]
     r = self.app.set_pages([msg, msg2], one_by_one=True)
     assert msg.qid in r['updated']
     assert msg2.qid in r['updated']
     
     
     r = self.app.set_pages([msg, msg2, msg, msg, msg], one_by_one=True)
     assert set(r['updated']) == set([msg.qid, msg2.qid])
Exemplo n.º 17
0
    def set_pages(self, msgs, fail_fast=False, one_by_one=False):
        """Utility method to insert into the db bunch of 
        messages
        
        @param msgs: list of TurboBeeMsg instances
        @return: dict with qids of 'created', 'updated',
            'errors'
        """

        with self.session_scope() as session:
            out = {}
            for msg in msgs:
                op = 'updated'
                page = None
                # object may already be there, we are updating it...
                if msg.qid:
                    page = session.query(Pages).filter_by(qid=msg.qid).first()

                if msg.status == Status.deleted and page is None:
                    if 'ignored' not in out:
                        out['ignored-deleted'] = []
                    out['ignored-deleted'].append(msg.qid)
                    continue

                if page is None:
                    op = 'created'
                    page = Pages(qid=uuid4().hex)
                    session.add(page)

                if msg.status == Status.deleted:
                    op = 'deleted'
                    session.delete(page)
                else:
                    now = get_date()
                    page.target = msg.target or page.target  # transfer the old defaults
                    page.content = msg.get_value()

                    # timestamps in msgs are datetime naive, make sure we apply timezone
                    page.created = msg.created.seconds and get_date(
                        msg.get_datetime(msg.created)) or now
                    page.content_type = self.guess_ctype(msg)
                    page.updated = msg.updated.seconds and get_date(
                        msg.get_datetime(msg.updated)) or now
                    # should we provide defaults if not set?
                    page.expires = msg.expires.seconds and get_date(
                        msg.get_datetime(msg.expires)) or None
                    page.lifetime = msg.eol.seconds and get_date(
                        msg.get_datetime(msg.eol)) or None
                    page.owner = msg.owner

                # keep the qid for later use (when session is expunged)
                qid = page.qid

                # insert one by one
                if one_by_one:
                    try:
                        session.commit()
                    except Exception as e:
                        self.logger.error(
                            'Error inserting %s into db. Message=%s', msg,
                            e.message)
                        session.rollback()
                        if fail_fast:
                            raise e
                        else:
                            if 'errors' not in out:
                                out['errors'] = []
                            out['errors'].append({
                                'qid': qid,
                                'msg': e.message
                            })

                if op not in out:
                    out[op] = []
                out[op].append(qid)

            if not one_by_one:
                try:
                    session.commit()
                except IntegrityError as e:
                    self.logger.error(
                        'Error inserting data into db. Message=%s', e.message)
                    session.rollback()
                    if fail_fast:
                        raise e
                    else:
                        if 'errors' not in out:
                            out['errors'] = []
                        out['errors'].append({'qid': None, 'msg': e.message})
        return out
Exemplo n.º 18
0
def _create_myads_query(template_type,
                        frequency,
                        data,
                        classes=None,
                        start_isodate=None):
    """
    Creates a query based on the stored myADS setup (for templated queries only)
    :param frequency: daily or weekly
    :param data: keywords or other stored query template data
    :param classes: arXiv classes, only required for arXiv template queries
    :return: out: list of dicts; constructed query, dates are such that it's meant to be run today:
                    [{q: query params,
                     sort: sort string}]
    """

    out = []
    beg_pubyear = (get_date() - datetime.timedelta(days=180)).year
    end_date = get_date().date()
    weekly_time_range = current_app.config.get('MYADS_WEEKLY_TIME_RANGE', 6)
    if start_isodate:
        start_isodate = parser.parse(start_isodate).date()
    if template_type in ('arxiv', None):
        if frequency == 'daily':
            # on Mondays, deal with the weekend properly
            if get_date().weekday() == 0:
                time_range = current_app.config.get('MYADS_DAILY_TIME_RANGE',
                                                    2)
                start_date = (get_date() -
                              datetime.timedelta(days=time_range)).date()
            else:
                start_date = get_date().date()
        elif frequency == 'weekly':
            start_date = (get_date() -
                          datetime.timedelta(days=weekly_time_range)).date()

        # if the provided last sent date is prior to normal start date, use the earlier date
        if start_isodate and (start_isodate < start_date):
            start_date = start_isodate

    if template_type == 'arxiv':
        if not classes:
            raise Exception(
                'Classes must be provided for an arXiv templated query')
        if type(classes) != list:
            tmp = [classes]
        else:
            tmp = classes
        classes = 'arxiv_class:(' + ' OR '.join(
            [x + '.*' if '.' not in x else x for x in tmp]) + ')'
        keywords = data
        if frequency == 'daily':
            connector = [' ', ' NOT ']
            # keyword search should be sorted by score, "other recent" should be sorted by bibcode
            sort_w_keywords = ['score desc, bibcode desc', 'bibcode desc']
        elif frequency == 'weekly':
            connector = [' ']
            sort_w_keywords = ['score desc, bibcode desc']
        if not keywords:
            q = 'bibstem:arxiv {0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\
                     format(classes, start_date, end_date, beg_pubyear)
            sort = 'bibcode desc'
            out.append({'q': q, 'sort': sort})
        else:
            for c, s in zip(connector, sort_w_keywords):
                q = 'bibstem:arxiv ({0}{1}({2})) entdate:["{3}Z00:00" TO "{4}Z23:59"] pubdate:[{5}-00 TO *]'.\
                    format(classes, c, keywords, start_date, end_date, beg_pubyear)
                sort = s
                out.append({'q': q, 'sort': sort})
    elif template_type == 'citations':
        keywords = data
        q = 'citations({0})'.format(keywords)
        sort = 'entry_date desc, bibcode desc'
        out.append({'q': q, 'sort': sort})
    elif template_type == 'authors':
        keywords = data
        start_date = (get_date() -
                      datetime.timedelta(days=weekly_time_range)).date()
        if start_isodate and (start_isodate < start_date):
            start_date = start_isodate
        q = '{0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\
            format(keywords, start_date, end_date, beg_pubyear)
        sort = 'score desc, bibcode desc'
        out.append({'q': q, 'sort': sort})
    elif template_type == 'keyword':
        keywords = data
        start_date = (get_date() -
                      datetime.timedelta(days=weekly_time_range)).date()
        if start_isodate and (start_isodate < start_date):
            start_date = start_isodate
        # most recent
        q = '{0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\
            format(keywords, start_date, end_date, beg_pubyear)
        sort = 'entry_date desc, bibcode desc'
        out.append({'q': q, 'sort': sort})
        # most popular
        q = 'trending({0})'.format(keywords)
        sort = 'score desc, bibcode desc'
        out.append({'q': q, 'sort': sort})
        # most cited
        q = 'useful({0})'.format(keywords)
        sort = 'score desc, bibcode desc'
        out.append({'q': q, 'sort': sort})
    elif template_type is None and data:
        # General query - for consistency with the rest of templates,
        # remove lists such as:
        #   {u'fq': [u'{!type=aqp v=$fq_database}'],
        #    u'fq_database': [u'(database:astronomy)'],
        #    u'q': [u'star'],
        #    u'sort': [u'citation_count desc, bibcode desc']}
        # but only if there is only one element
        general = {
            k: v[0] if isinstance(v, (list, tuple)) and len(v) == 1 else v
            for k, v in list(data.items())
        }
        if 'q' in general:
            general['q'] = '{0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\
                format(general['q'], start_date, end_date, beg_pubyear)
        out.append(general)

    return out
Exemplo n.º 19
0
    def test_template_query(self):
        '''Tests storage and retrieval of templated myADS queries'''
        httpretty.register_uri(
            httpretty.GET,
            self.app.config.get('VAULT_SOLR_QUERY_ENDPOINT'),
            content_type='application/json',
            status=200,
            body="""{
                    "responseHeader":{
                    "status":0, "QTime":0,
                    "params":{ "fl":"title,bibcode", "indent":"true", "wt":"json", "q":"*:*"}},
                    "response":{"numFound":10456930,"start":0,"docs":[
                      { "bibcode":"2005JGRC..110.4002G" },
                      { "bibcode":"2005JGRC..110.4003N" },
                      { "bibcode":"2005JGRC..110.4004Y" }]}}""")

        now = adsmutils.get_date().date()
        beg_pubyear = (now - datetime.timedelta(days=180)).year

        with self.app.session_scope() as session:
            r = session.query(User).filter_by(id=4).first()
            self.assertIsNone(r, True)

        # try to store a query with insufficient metadata
        r = self.client.post(url_for('user.myads_notifications'),
                             headers={
                                 'Authorization': 'secret',
                                 'X-Adsws-Uid': '4'
                             },
                             data=json.dumps({'data': 'keyword1 OR keyword2'}),
                             content_type='application/json')

        self.assertStatus(r, 400)

        # try to store a query with data keyword of the wrong type (also insufficient metadata)
        r = self.client.post(url_for('user.myads_notifications'),
                             headers={
                                 'Authorization': 'secret',
                                 'X-Adsws-Uid': '4'
                             },
                             data=json.dumps({'data': 123}),
                             content_type='application/json')

        self.assertStatus(r, 400)

        # try to store a query with the classes keyword of the wrong type
        r = self.client.post(url_for('user.myads_notifications'),
                             headers={
                                 'Authorization': 'secret',
                                 'X-Adsws-Uid': '4'
                             },
                             data=json.dumps({
                                 'type': 'template',
                                 'template': 'arxiv',
                                 'classes': 'astro-ph',
                                 'data': 'keyword1 OR keyword2'
                             }),
                             content_type='application/json')

        self.assertStatus(r, 400)

        # store a query correctly
        r = self.client.post(url_for('user.myads_notifications'),
                             headers={
                                 'Authorization': 'secret',
                                 'X-Adsws-Uid': '4'
                             },
                             data=json.dumps({
                                 'type': 'template',
                                 'template': 'keyword',
                                 'data': 'keyword1 OR keyword2'
                             }),
                             content_type='application/json')

        self.assertStatus(r, 200)
        query_id = r.json['id']

        # test that the pipeline export works as expected
        r = self.client.get(url_for('user.get_myads', user_id='4'),
                            headers={'Authorization': 'secret'})

        start_date = (adsmutils.get_date() -
                      datetime.timedelta(days=25)).date()

        self.assertStatus(r, 200)
        self.assertEquals(r.json[0]['id'], query_id)
        self.assertEquals(r.json[0]['name'], 'keyword1, etc.')
        self.assertTrue(r.json[0]['active'])
        self.assertFalse(r.json[0]['stateful'])
        self.assertEquals(r.json[0]['frequency'], 'weekly')
        self.assertEquals(r.json[0]['type'], 'template')
        self.assertEquals(r.json[0]['template'], 'keyword')
        self.assertEquals(r.json[0]['data'], 'keyword1 OR keyword2')

        # try to retrieve a query without a user ID in the headers
        r = self.client.get(url_for('user.myads_notifications',
                                    myads_id=query_id),
                            headers={'Authorization': 'secret'})

        self.assertStatus(r, 400)

        # successfully retrieve a query setup
        r = self.client.get(url_for('user.myads_notifications',
                                    myads_id=query_id),
                            headers={
                                'Authorization': 'secret',
                                'X-Adsws-Uid': '4'
                            })

        self.assertStatus(r, 200)
        self.assertEquals(r.json[0]['id'], query_id)
        self.assertEquals(r.json[0]['name'], 'keyword1, etc.')
        self.assertTrue(r.json[0]['active'])
        self.assertFalse(r.json[0]['stateful'])
        self.assertEquals(r.json[0]['frequency'], 'weekly')
        self.assertEquals(r.json[0]['type'], 'template')

        # successfully delete the query setup
        r = self.client.delete(url_for('user.myads_notifications',
                                       myads_id=query_id),
                               headers={
                                   'Authorization': 'secret',
                                   'X-Adsws-Uid': '4'
                               })

        self.assertStatus(r, 204)

        # ensure the query is really deleted
        with self.app.session_scope() as session:
            q = session.query(MyADS).filter_by(id=query_id).first()
            self.assertIsNone(q)

        # ensure the get returns the right status for a missing query
        r = self.client.get(url_for('user.myads_notifications',
                                    myads_id=query_id),
                            headers={
                                'Authorization': 'secret',
                                'X-Adsws-Uid': '4'
                            })

        self.assertStatus(r, 404)

        # save an arxiv template query successfully
        r = self.client.post(url_for('user.myads_notifications'),
                             headers={
                                 'Authorization': 'secret',
                                 'X-Adsws-Uid': '4'
                             },
                             data=json.dumps({
                                 'type': 'template',
                                 'template': 'arxiv',
                                 'data': 'keyword1 OR keyword2',
                                 'classes': ['astro-ph']
                             }),
                             content_type='application/json')

        self.assertStatus(r, 200)
        query_id = r.json['id']

        # check the stored query via the pipeline export
        r = self.client.get(url_for('user.get_myads', user_id='4'),
                            headers={'Authorization': 'secret'})

        if adsmutils.get_date().weekday() == 0:
            start_date = (adsmutils.get_date() -
                          datetime.timedelta(days=2)).date()
        else:
            start_date = adsmutils.get_date().date()

        self.assertStatus(r, 200)
        self.assertEquals(r.json[0]['id'], query_id)
        self.assertEquals(r.json[0]['name'], 'keyword1, etc. - Recent Papers')
        self.assertFalse(r.json[0]['stateful'])
        self.assertEquals(r.json[0]['type'], 'template')
        self.assertTrue(r.json[0]['active'])
        self.assertEquals(r.json[0]['frequency'], 'daily')
        self.assertEquals(r.json[0]['template'], 'arxiv')
        self.assertEquals(r.json[0]['data'], 'keyword1 OR keyword2')
        self.assertEquals(r.json[0]['classes'], [u'astro-ph'])

        # edit the stored query
        r = self.client.put(url_for('user.myads_notifications',
                                    myads_id=query_id),
                            headers={
                                'Authorization': 'secret',
                                'X-Adsws-Uid': '4'
                            },
                            data=json.dumps({
                                'type': 'template',
                                'template': 'arxiv',
                                'data': 'keyword1 OR keyword2 OR keyword3',
                                'classes': ['astro-ph']
                            }),
                            content_type='application/json')

        self.assertStatus(r, 200)

        # check editing the query name
        r = self.client.put(url_for('user.myads_notifications',
                                    myads_id=query_id),
                            headers={
                                'Authorization': 'secret',
                                'X-Adsws-Uid': '4'
                            },
                            data=json.dumps({
                                'type': 'template',
                                'template': 'arxiv',
                                'name': 'keyword1, etc. - Recent Papers',
                                'data': 'keyword2 OR keyword3',
                                'classes': ['astro-ph']
                            }),
                            content_type='application/json')

        self.assertStatus(r, 200)
        # name was provided, but it was constructed, so the name should be updated
        self.assertEquals(r.json['name'], 'keyword2, etc. - Recent Papers')

        r = self.client.put(url_for('user.myads_notifications',
                                    myads_id=query_id),
                            headers={
                                'Authorization': 'secret',
                                'X-Adsws-Uid': '4'
                            },
                            data=json.dumps({
                                'type': 'template',
                                'template': 'arxiv',
                                'name': 'test query',
                                'data': 'keyword2 OR keyword3',
                                'classes': ['astro-ph']
                            }),
                            content_type='application/json')

        self.assertStatus(r, 200)
        # a non-constructed name was provided - use that
        self.assertEquals(r.json['name'], 'test query')

        r = self.client.put(url_for('user.myads_notifications',
                                    myads_id=query_id),
                            headers={
                                'Authorization': 'secret',
                                'X-Adsws-Uid': '4'
                            },
                            data=json.dumps({
                                'type': 'template',
                                'template': 'arxiv',
                                'data': 'keyword1 OR keyword2 OR keyword3',
                                'classes': ['astro-ph']
                            }),
                            content_type='application/json')

        self.assertStatus(r, 200)
        # no name is provided, so keep the old provided name
        self.assertEquals(r.json['name'], 'test query')

        # check the exported setup
        r = self.client.get(url_for('user.get_myads', user_id='4'),
                            headers={'Authorization': 'secret'})

        self.assertStatus(r, 200)
        self.assertEquals(r.json[0]['id'], query_id)
        self.assertEquals(r.json[0]['name'], 'test query')
        self.assertFalse(r.json[0]['stateful'])
        self.assertEquals(r.json[0]['type'], 'template')
        self.assertTrue(r.json[0]['active'])
        self.assertEquals(r.json[0]['frequency'], 'daily')
        self.assertEquals(r.json[0]['template'], 'arxiv')
        self.assertEquals(r.json[0]['data'],
                          'keyword1 OR keyword2 OR keyword3')
        self.assertEquals(r.json[0]['classes'], ['astro-ph'])

        # add a second query
        r = self.client.post(url_for('user.myads_notifications'),
                             headers={
                                 'Authorization': 'secret',
                                 'X-Adsws-Uid': '4'
                             },
                             data=json.dumps({
                                 'type': 'template',
                                 'template': 'authors',
                                 'data': 'author:"Kurtz, M."'
                             }),
                             content_type='application/json')

        self.assertStatus(r, 200)
        self.assertEquals(r.json['name'], 'Favorite Authors - Recent Papers')

        # get all queries back
        r = self.client.get(url_for('user.myads_notifications'),
                            headers={
                                'Authorization': 'secret',
                                'X-Adsws-Uid': '4'
                            })

        self.assertStatus(r, 200)
        self.assertEquals(r.json[0]['name'], 'test query')
        self.assertEquals(r.json[1]['name'],
                          'Favorite Authors - Recent Papers')

        # save an arXiv query without keywords
        r = self.client.post(url_for('user.myads_notifications'),
                             headers={
                                 'Authorization': 'secret',
                                 'X-Adsws-Uid': '4'
                             },
                             data=json.dumps({
                                 'type': 'template',
                                 'template': 'arxiv',
                                 'classes': ['cs']
                             }),
                             content_type='application/json')

        self.assertStatus(r, 200)
        self.assertEquals(r.json['data'], None)

        r = self.client.post(url_for('user.myads_notifications'),
                             headers={
                                 'Authorization': 'secret',
                                 'X-Adsws-Uid': '4'
                             },
                             data=json.dumps({
                                 'type': 'template',
                                 'data': '',
                                 'template': 'arxiv',
                                 'classes': ['hep-ex']
                             }),
                             content_type='application/json')

        self.assertStatus(r, 200)
        self.assertEquals(r.json['data'], None)

        # test a blank arXiv query
        r = self.client.post(url_for('user.myads_notifications'),
                             headers={
                                 'Authorization': 'secret',
                                 'X-Adsws-Uid': '4'
                             },
                             data=json.dumps({
                                 'type': 'template',
                                 'template': 'arxiv',
                                 'classes': ['astro-ph']
                             }),
                             content_type='application/json')

        self.assertStatus(r, 200)
        query_id = r.json['id']

        # make sure it's editable
        r = self.client.put(url_for('user.myads_notifications',
                                    myads_id=query_id),
                            headers={
                                'Authorization': 'secret',
                                'X-Adsws-Uid': '4'
                            },
                            data=json.dumps({
                                'type': 'template',
                                'template': 'arxiv',
                                'active': False
                            }),
                            content_type='application/json')

        self.assertStatus(r, 200)

        r = self.client.put(url_for('user.myads_notifications',
                                    myads_id=query_id),
                            headers={
                                'Authorization': 'secret',
                                'X-Adsws-Uid': '4'
                            },
                            data=json.dumps({
                                'type': 'template',
                                'template': 'arxiv',
                                'data': 'keyword1',
                                'classes': ['astro-ph']
                            }),
                            content_type='application/json')

        self.assertStatus(r, 200)

        # test the citation query construction
        r = self.client.post(url_for('user.myads_notifications'),
                             headers={
                                 'Authorization': 'secret',
                                 'X-Adsws-Uid': '4'
                             },
                             data=json.dumps({
                                 'type': 'template',
                                 'template': 'citations',
                                 'data': 'author:"Kurtz, Michael"'
                             }),
                             content_type='application/json')

        self.assertStatus(r, 200)
        query_id = r.json['id']

        r = self.client.get(url_for('user.myads_notifications',
                                    myads_id=query_id),
                            headers={
                                'Authorization': 'secret',
                                'X-Adsws-Uid': '4'
                            })

        self.assertStatus(r, 200)
        self.assertEquals(r.json[0]['id'], query_id)
        self.assertEquals(r.json[0]['name'],
                          'author:"Kurtz, Michael" - Citations')
        self.assertTrue(r.json[0]['active'])
        self.assertTrue(r.json[0]['stateful'])
        self.assertEquals(r.json[0]['frequency'], 'weekly')
        self.assertEquals(r.json[0]['type'], 'template')

        # test the author query construction
        r = self.client.post(url_for('user.myads_notifications'),
                             headers={
                                 'Authorization': 'secret',
                                 'X-Adsws-Uid': '4'
                             },
                             data=json.dumps({
                                 'type': 'template',
                                 'template': 'authors',
                                 'data': 'author:"Kurtz, Michael"'
                             }),
                             content_type='application/json')

        self.assertStatus(r, 200)
        query_id = r.json['id']

        r = self.client.get(url_for('user.myads_notifications',
                                    myads_id=query_id),
                            headers={
                                'Authorization': 'secret',
                                'X-Adsws-Uid': '4'
                            })

        start_date = (adsmutils.get_date() -
                      datetime.timedelta(days=25)).date()

        self.assertStatus(r, 200)
        self.assertEquals(r.json[0]['id'], query_id)
        self.assertEquals(r.json[0]['name'],
                          'Favorite Authors - Recent Papers')
        self.assertTrue(r.json[0]['active'])
        self.assertTrue(r.json[0]['stateful'])
        self.assertEquals(r.json[0]['frequency'], 'weekly')
        self.assertEquals(r.json[0]['type'], 'template')
Exemplo n.º 20
0
    def post(self, library):
        """
        HTTP POST request that conducts operations at the library level.

        :param library: primary library ID
        :return: response if operation was successful

        Header:
        -------
        Must contain the API forwarded user ID of the user accessing the end
        point

        Post body:
        ----------
        KEYWORD, VALUE

        libraries: <list>   List of secondary libraries to include in the action (optional, based on action)
        action: <unicode>   union, intersection, difference, copy, empty
                            Actions to perform on given libraries:
                                Union: requires one or more secondary libraries to be passed; takes the union of the
                                    primary and secondary library sets; a new library is created
                                Intersection: requires one or more secondary libraries to be passed; takes the
                                    intersection of the primary and secondary library sets; a new library is created
                                Difference: requires one or more secondary libraries to be passed; takes the difference
                                    between the primary and secondary libraries; the primary library comes first in the
                                    operation, so the secondary library is removed from the primary; a new library
                                    is created
                                Copy: requires one and only one secondary library to be passed; the primary library
                                    will be copied into the secondary library (so the secondary library will be
                                    overwritten); no new library is created
                                Empty: secondary libraries are ignored; the primary library will be emptied of its
                                    contents, though the library and metadata will remain; no new library is created
        name: <string>      (optional) name of the new library (must be unique for that user); used only for actions in
                                [union, intersection, difference]
        description: <string> (optional) description of the new library; used only for actions in
                                [union, intersection, difference]
        public: <boolean>   (optional) is the new library public to view; used only for actions in
                                [union, intersection, difference]

        -----------
        Return data:
        -----------
        name:           <string>    Name of the library
        id:             <string>    ID of the library
        description:    <string>    Description of the library

        Permissions:
        -----------
        The following type of user can conduct library operations:
          - owner
          - admin
          - write
        """

        # Get the user requesting this from the header
        try:
            user_editing = self.helper_get_user_id()
        except KeyError:
            return err(MISSING_USERNAME_ERROR)

        # URL safe base64 string to UUID
        try:
            library_uuid = self.helper_slug_to_uuid(library)
        except TypeError:
            return err(BAD_LIBRARY_ID_ERROR)

        user_editing_uid = \
            self.helper_absolute_uid_to_service_uid(absolute_uid=user_editing)

        # Check the permissions of the user
        if not self.write_access(service_uid=user_editing_uid,
                                 library_id=library_uuid):
            return err(NO_PERMISSION_ERROR)

        try:
            data = get_post_data(request,
                                 types=dict(libraries=list,
                                            action=str,
                                            name=str,
                                            description=str,
                                            public=bool))
        except TypeError as error:
            current_app.logger.error(
                'Wrong type passed for POST: {0} [{1}]'.format(
                    request.data, error))
            return err(WRONG_TYPE_ERROR)

        if data['action'] in ['union', 'intersection', 'difference']:
            if 'libraries' not in data:
                return err(NO_LIBRARY_SPECIFIED_ERROR)
            if 'name' not in data:
                data['name'] = 'Untitled {0}.'.format(get_date().isoformat())
            if 'public' not in data:
                data['public'] = False

        if data['action'] == 'copy':
            if 'libraries' not in data:
                return err(NO_LIBRARY_SPECIFIED_ERROR)
            if len(data['libraries']) > 1:
                return err(TOO_MANY_LIBRARIES_SPECIFIED_ERROR)

        lib_names = []
        with current_app.session_scope() as session:
            primary = session.query(Library).filter_by(id=library_uuid).one()
            lib_names.append(primary.name)
            if 'libraries' in data:
                for lib in data['libraries']:
                    try:
                        secondary_uuid = self.helper_slug_to_uuid(lib)
                    except TypeError:
                        return err(BAD_LIBRARY_ID_ERROR)
                    secondary = session.query(Library).filter_by(
                        id=secondary_uuid).one()
                    lib_names.append(secondary.name)

        if data['action'] == 'union':
            bib_union = self.setops_libraries(library_id=library_uuid,
                                              document_data=data,
                                              operation='union')

            current_app.logger.info(
                'Successfully took the union of the libraries {0} (IDs: {1}, {2})'
                .format(', '.join(lib_names), library,
                        ', '.join(data['libraries'])))

            data['bibcode'] = bib_union
            if 'description' not in data:
                description = 'Union of libraries {0} (IDs: {1}, {2})' \
                    .format(', '.join(lib_names), library, ', '.join(data['libraries']))
                # field length capped in model
                if len(description) > 200:
                    description = 'Union of library {0} (ID: {1}) with {2} other libraries'\
                        .format(lib_names[0], library, len(lib_names[1:]))

                data['description'] = description

            try:
                library_dict = self.create_library(
                    service_uid=user_editing_uid, library_data=data)
            except BackendIntegrityError as error:
                current_app.logger.error(error)
                return err(DUPLICATE_LIBRARY_NAME_ERROR)
            except TypeError as error:
                current_app.logger.error(error)
                return err(WRONG_TYPE_ERROR)

            return library_dict, 200

        elif data['action'] == 'intersection':
            bib_intersect = self.setops_libraries(library_id=library_uuid,
                                                  document_data=data,
                                                  operation='intersection')
            current_app.logger.info(
                'Successfully took the intersection of the libraries {0} (IDs: {1}, {2})'
                .format(', '.join(lib_names), library,
                        ', '.join(data['libraries'])))

            data['bibcode'] = bib_intersect
            if 'description' not in data:
                description = 'Intersection of {0} (IDs: {1}, {2})' \
                    .format(', '.join(lib_names), library, ', '.join(data['libraries']))
                if len(description) > 200:
                    description = 'Intersection of {0} (ID: {1}) with {2} other libraries'\
                        .format(lib_names[0], library, len(lib_names[1:]))

                data['description'] = description

            try:
                library_dict = self.create_library(
                    service_uid=user_editing_uid, library_data=data)
            except BackendIntegrityError as error:
                current_app.logger.error(error)
                return err(DUPLICATE_LIBRARY_NAME_ERROR)
            except TypeError as error:
                current_app.logger.error(error)
                return err(WRONG_TYPE_ERROR)
            return library_dict, 200

        elif data['action'] == 'difference':
            bib_diff = self.setops_libraries(library_id=library_uuid,
                                             document_data=data,
                                             operation='difference')
            current_app.logger.info(
                'Successfully took the difference of {0} (ID {2}) - (minus) {1} (ID {3})'
                .format(lib_names[0], ', '.join(lib_names[1:]), library,
                        ', '.join(data['libraries'])))

            data['bibcode'] = bib_diff
            if 'description' not in data:
                data['description'] = 'Records that are in {0} (ID {2}) but not in {1} (ID {3})' \
                    .format(lib_names[0], ', '.join(lib_names[1:]), library, ', '.join(data['libraries']))

            try:
                library_dict = self.create_library(
                    service_uid=user_editing_uid, library_data=data)
            except BackendIntegrityError as error:
                current_app.logger.error(error)
                return err(DUPLICATE_LIBRARY_NAME_ERROR)
            except TypeError as error:
                current_app.logger.error(error)
                return err(WRONG_TYPE_ERROR)
            return library_dict, 200

        elif data['action'] == 'copy':
            library_dict = self.copy_library(library_id=library_uuid,
                                             document_data=data)
            current_app.logger.info(
                'Successfully copied {0} (ID {2}) into {1} (ID {3})'.format(
                    lib_names[0], lib_names[1], library, data['libraries'][0]))

            with current_app.session_scope() as session:
                libid = self.helper_slug_to_uuid(data['libraries'][0])
                library = session.query(Library).filter_by(id=libid).one()
                bib = library.get_bibcodes()

                library_dict['bibcode'] = bib

            return library_dict, 200

        elif data['action'] == 'empty':
            library_dict = self.empty_library(library_id=library_uuid)
            current_app.logger.info(
                'Successfully emptied {0} (ID {1}) of all records'.format(
                    lib_names[0], library))

            with current_app.session_scope() as session:
                library = session.query(Library).filter_by(
                    id=library_uuid).one()
                bib = library.get_bibcodes()

                library_dict['bibcode'] = bib

            return library_dict, 200

        else:
            current_app.logger.info('User requested a non-standard operation')
            return {}, 400
Exemplo n.º 21
0
    def post(self, library):
        """
        HTTP POST request that conducts operations at the library level.

        :param library: primary library ID
        :return: response if operation was successful

        Header:
        -------
        Must contain the API forwarded user ID of the user accessing the end
        point

        Post body:
        ----------
        KEYWORD, VALUE

        libraries: <list>   List of secondary libraries to include in the action (optional, based on action)
        action: <unicode>   union, intersection, difference, copy, empty
                            Actions to perform on given libraries:
                                Union: requires one or more secondary libraries to be passed; takes the union of the
                                    primary and secondary library sets; a new library is created
                                Intersection: requires one or more secondary libraries to be passed; takes the
                                    intersection of the primary and secondary library sets; a new library is created
                                Difference: requires one or more secondary libraries to be passed; takes the difference
                                    between the primary and secondary libraries; the primary library comes first in the
                                    operation, so the secondary library is removed from the primary; a new library
                                    is created
                                Copy: requires one and only one secondary library to be passed; the primary library
                                    will be copied into the secondary library (so the secondary library will be
                                    overwritten); no new library is created
                                Empty: secondary libraries are ignored; the primary library will be emptied of its
                                    contents, though the library and metadata will remain; no new library is created
        name: <string>      (optional) name of the new library (must be unique for that user); used only for actions in
                                [union, intersection, difference]
        description: <string> (optional) description of the new library; used only for actions in
                                [union, intersection, difference]
        public: <boolean>   (optional) is the new library public to view; used only for actions in
                                [union, intersection, difference]

        -----------
        Return data:
        -----------
        name:           <string>    Name of the library
        id:             <string>    ID of the library
        description:    <string>    Description of the library

        Permissions:
        -----------
        The following type of user can conduct library operations:
          - owner
          - admin
          - write
        """

        # Get the user requesting this from the header
        try:
            user_editing = self.helper_get_user_id()
        except KeyError:
            return err(MISSING_USERNAME_ERROR)

        # URL safe base64 string to UUID
        library_uuid = self.helper_slug_to_uuid(library)

        user_editing_uid = \
            self.helper_absolute_uid_to_service_uid(absolute_uid=user_editing)

        # Check the permissions of the user
        if not self.write_access(service_uid=user_editing_uid,
                                 library_id=library_uuid):
            return err(NO_PERMISSION_ERROR)

        try:
            data = get_post_data(
                request,
                types=dict(libraries=list, action=basestring, name=basestring, description=basestring, public=bool)
            )
        except TypeError as error:
            current_app.logger.error('Wrong type passed for POST: {0} [{1}]'
                                     .format(request.data, error))
            return err(WRONG_TYPE_ERROR)

        if data['action'] in ['union', 'intersection', 'difference']:
            if 'libraries' not in data:
                return err(NO_LIBRARY_SPECIFIED_ERROR)
            if 'name' not in data:
                data['name'] = 'Untitled {0}.'.format(get_date().isoformat())
            if 'public' not in data:
                data['public'] = False

        if data['action'] == 'copy':
            if 'libraries' not in data:
                return err(NO_LIBRARY_SPECIFIED_ERROR)
            if len(data['libraries']) > 1:
                return err(TOO_MANY_LIBRARIES_SPECIFIED_ERROR)

        lib_names = []
        with current_app.session_scope() as session:
            primary = session.query(Library).filter_by(id=library_uuid).one()
            lib_names.append(primary.name)
            if 'libraries' in data:
                for lib in data['libraries']:
                    secondary_uuid = self.helper_slug_to_uuid(lib)
                    secondary = session.query(Library).filter_by(id=secondary_uuid).one()
                    lib_names.append(secondary.name)

        if data['action'] == 'union':
            bib_union = self.setops_libraries(
                library_id=library_uuid,
                document_data=data,
                operation='union'
            )

            current_app.logger.info('Successfully took the union of the libraries {0} (IDs: {1}, {2})'
                    .format(', '.join(lib_names), library, ', '.join(data['libraries'])))

            data['bibcode'] = bib_union
            if 'description' not in data:
                data['description'] = 'Union of libraries {0} (IDs: {1}, {2})' \
                    .format(', '.join(lib_names), library, ', '.join(data['libraries']))

            try:
                library_dict = self.create_library(service_uid=user_editing_uid, library_data=data)
            except BackendIntegrityError as error:
                current_app.logger.error(error)
                return err(DUPLICATE_LIBRARY_NAME_ERROR)
            except TypeError as error:
                current_app.logger.error(error)
                return err(WRONG_TYPE_ERROR)

            return library_dict, 200

        elif data['action'] == 'intersection':
            bib_intersect = self.setops_libraries(
                library_id=library_uuid,
                document_data=data,
                operation='intersection'
            )
            current_app.logger.info('Successfully took the intersection of the libraries {0} (IDs: {1}, {2})'
                    .format(', '.join(lib_names), library, ', '.join(data['libraries'])))

            data['bibcode'] = bib_intersect
            if 'description' not in data:
                data['description'] = 'Intersection of {0} (IDs: {1}, {2})' \
                    .format(', '.join(lib_names), library, ', '.join(data['libraries']))

            try:
                library_dict = self.create_library(service_uid=user_editing_uid, library_data=data)
            except BackendIntegrityError as error:
                current_app.logger.error(error)
                return err(DUPLICATE_LIBRARY_NAME_ERROR)
            except TypeError as error:
                current_app.logger.error(error)
                return err(WRONG_TYPE_ERROR)
            return library_dict, 200

        elif data['action'] == 'difference':
            bib_diff = self.setops_libraries(
                library_id=library_uuid,
                document_data=data,
                operation='difference'
            )
            current_app.logger.info('Successfully took the difference of {0} (ID {2}) - (minus) {1} (ID {3})'
                    .format(lib_names[0], ', '.join(lib_names[1:]), library, ', '.join(data['libraries'])))

            data['bibcode'] = bib_diff
            if 'description' not in data:
                data['description'] = 'Records that are in {0} (ID {2}) but not in {1} (ID {3})' \
                    .format(lib_names[0], ', '.join(lib_names[1:]), library, ', '.join(data['libraries']))

            try:
                library_dict = self.create_library(service_uid=user_editing_uid, library_data=data)
            except BackendIntegrityError as error:
                current_app.logger.error(error)
                return err(DUPLICATE_LIBRARY_NAME_ERROR)
            except TypeError as error:
                current_app.logger.error(error)
                return err(WRONG_TYPE_ERROR)
            return library_dict, 200

        elif data['action'] == 'copy':
            library_dict = self.copy_library(
                library_id=library_uuid,
                document_data=data
            )
            current_app.logger.info('Successfully copied {0} (ID {2}) into {1} (ID {3})'
                                    .format(lib_names[0], lib_names[1], library, data['libraries'][0]))

            with current_app.session_scope() as session:
                libid = self.helper_slug_to_uuid(data['libraries'][0])
                library = session.query(Library).filter_by(id=libid).one()
                bib = library.get_bibcodes()

                library_dict['bibcode'] = bib

            return library_dict, 200

        elif data['action'] == 'empty':
            library_dict = self.empty_library(
                library_id=library_uuid
            )
            current_app.logger.info('Successfully emptied {0} (ID {1}) of all records'
                                    .format(lib_names[0], library))

            with current_app.session_scope() as session:
                library = session.query(Library).filter_by(id=library_uuid).one()
                bib = library.get_bibcodes()

                library_dict['bibcode'] = bib

            return library_dict, 200

        else:
            current_app.logger.info('User requested a non-standard operation')
            return {}, 400
Exemplo n.º 22
0
    def test_myads_execute_notification(self):

        httpretty.register_uri(
            httpretty.GET,
            self.app.config.get('VAULT_SOLR_QUERY_ENDPOINT'),
            content_type='application/json',
            status=200,
            body="""{
                            "responseHeader":{
                            "status":0, "QTime":0,
                            "params":{ "fl":"title,bibcode", "indent":"true", "wt":"json", "q":"*:*"}},
                            "response":{"numFound":10456930,"start":0,"docs":[
                              { "bibcode":"2005JGRC..110.4002G" },
                              { "bibcode":"2005JGRC..110.4003N" },
                              { "bibcode":"2005JGRC..110.4004Y" }]}}""")

        now = adsmutils.get_date().date()
        beg_pubyear = (now - datetime.timedelta(days=180)).year

        # can't use as anonymous user
        user_id = self.app.config.get('BOOTSTRAP_USER_ID')
        r = self.client.get(url_for('user.execute_myads_query', myads_id=123),
                            headers={
                                'Authorization': 'secret',
                                'X-Adsws-Uid': user_id
                            })

        self.assertStatus(r, 400)

        user_id = 6

        r = self.client.post(url_for('user.myads_notifications'),
                             headers={
                                 'Authorization': 'secret',
                                 'X-Adsws-Uid': user_id
                             },
                             data=json.dumps({
                                 'type': 'template',
                                 'template': 'authors',
                                 'data': 'author:"Kurtz, Michael"'
                             }),
                             content_type='application/json')

        self.assertStatus(r, 200)
        query_id = r.json['id']

        r = self.client.get(url_for('user.execute_myads_query',
                                    myads_id=query_id),
                            headers={
                                'Authorization': 'secret',
                                'X-Adsws-Uid': user_id
                            })

        start_date = (adsmutils.get_date() -
                      datetime.timedelta(days=25)).date()

        self.assertStatus(r, 200)
        self.assertEquals(r.json, [{
            'q':
            'author:"Kurtz, Michael" entdate:["{0}Z00:00" TO "{1}Z23:59"] '
            'pubdate:[{2}-00 TO *]'.format(start_date, now, beg_pubyear),
            'sort':
            'score desc, bibcode desc'
        }])
Exemplo n.º 23
0
    def test_myads_retrieval(self):
        '''Tests pipeline retrieval of myADS setup and users'''

        now = adsmutils.get_date()

        with self.app.session_scope() as session:
            q = session.query(Query).first()

            qid = q.qid

        # make sure no setups exist
        r = self.client.get(url_for('user.myads_notifications'),
                            headers={
                                'Authorization': 'secret',
                                'X-Adsws-Uid': '3'
                            })

        self.assertStatus(r, 204)

        # try saving a query with bad data
        r = self.client.post(url_for('user.myads_notifications'),
                             headers={
                                 'Authorization': 'secret',
                                 'X-Adsws-Uid': '3'
                             },
                             data=json.dumps({
                                 'name': 'Query 1',
                                 'qid': qid,
                                 'stateful': True,
                                 'frequency': 'bad data',
                                 'type': 'query'
                             }),
                             content_type='application/json')

        self.assertStatus(r, 400)

        # save the query correctly
        r = self.client.post(url_for('user.myads_notifications'),
                             headers={
                                 'Authorization': 'secret',
                                 'X-Adsws-Uid': '3'
                             },
                             data=json.dumps({
                                 'name': 'Query 1',
                                 'qid': qid,
                                 'stateful': True,
                                 'frequency': 'daily',
                                 'type': 'query'
                             }),
                             content_type='application/json')

        self.assertStatus(r, 200)
        self.assert_(r.json['name'] == 'Query 1')
        self.assertTrue(r.json['active'])
        myads_id = r.json['id']

        # edit the query with bad data
        r = self.client.put(url_for('user.myads_notifications',
                                    myads_id=myads_id),
                            headers={
                                'Authorization': 'secret',
                                'X-Adsws-Uid': '3'
                            },
                            data=json.dumps({
                                'name': 'Query 1 - edited',
                                'stateful': 'bad data'
                            }),
                            content_type='application/json')

        self.assertStatus(r, 400)

        # edit the query correctly
        r = self.client.put(url_for('user.myads_notifications',
                                    myads_id=myads_id),
                            headers={
                                'Authorization': 'secret',
                                'X-Adsws-Uid': '3'
                            },
                            data=json.dumps({'name': 'Query 1 - edited'}),
                            content_type='application/json')

        self.assertStatus(r, 200)
        self.assertEquals(r.json['name'], 'Query 1 - edited')

        # get all myADS setups via the pipeline endpoint
        r = self.client.get(url_for('user.get_myads', user_id='3'),
                            headers={'Authorization': 'secret'})

        self.assertStatus(r, 200)
        self.assertEquals(r.json[0]['name'], 'Query 1 - edited')
        self.assertEquals(r.json[0]['qid'], qid)
        self.assertTrue(r.json[0]['active'])
        self.assertTrue(r.json[0]['stateful'])
        self.assertEquals(r.json[0]['frequency'], 'daily')
        self.assertEquals(r.json[0]['type'], 'query')

        # get all myADS setups via the BBB endpoint
        r = self.client.get(url_for('user.myads_notifications'),
                            headers={
                                'Authorization': 'secret',
                                'X-Adsws-Uid': '3'
                            })

        self.assertStatus(r, 200)
        self.assertEquals(r.json[0]['name'], 'Query 1 - edited')
        self.assertTrue(r.json[0]['active'])
        self.assertEquals(r.json[0]['frequency'], 'daily')
        self.assertEquals(r.json[0]['type'], 'query')

        # fetch the active myADS users
        r = self.client.get(url_for('user.export', iso_datestring=now))

        self.assertStatus(r, 200)
        self.assertEquals(r.json, {'users': [3]})
Exemplo n.º 24
0
def _create_myads_query(template_type, frequency, data, classes=None):
    """
    Creates a query based on the stored myADS setup (for templated queries only)
    :param frequency: daily or weekly
    :param data: keywords or other stored query template data
    :param classes: arXiv classes, only required for arXiv template queries
    :return: out: list of dicts; constructed query, dates are such that it's meant to be run today:
                    [{q: query params,
                     sort: sort string}]
    """

    out = []
    beg_pubyear = (get_date() - datetime.timedelta(days=180)).year
    end_date = get_date().date()

    if template_type == 'arxiv':
        if not classes:
            raise Exception(
                'Classes must be provided for an arXiv templated query')
        if type(classes) != list:
            tmp = [classes]
        else:
            tmp = classes
        classes = 'arxiv_class:(' + ' OR '.join(
            [x + '.*' if '.' not in x else x for x in tmp]) + ')'
        keywords = data
        if frequency == 'daily':
            connector = [' ', ' NOT ']
            # keyword search should be sorted by score, "other recent" should be sorted by bibcode
            sort_w_keywords = ['score desc, bibcode desc', 'bibcode desc']
            # on Mondays, deal with the weekend properly
            if get_date().weekday() == 0:
                start_date = (get_date() - datetime.timedelta(days=2)).date()
            else:
                start_date = get_date().date()
        elif frequency == 'weekly':
            connector = [' ']
            sort_w_keywords = ['score desc, bibcode desc']
            start_date = (get_date() - datetime.timedelta(days=25)).date()
        if not keywords:
            q = 'bibstem:arxiv {0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\
                     format(classes, start_date, end_date, beg_pubyear)
            sort = 'bibcode desc'
            out.append({'q': q, 'sort': sort})
        else:
            for c, s in zip(connector, sort_w_keywords):
                q = 'bibstem:arxiv ({0}{1}({2})) entdate:["{3}Z00:00" TO "{4}Z23:59"] pubdate:[{5}-00 TO *]'.\
                    format(classes, c, keywords, start_date, end_date, beg_pubyear)
                sort = s
                out.append({'q': q, 'sort': sort})
    elif template_type == 'citations':
        keywords = data
        q = 'citations({0})'.format(keywords)
        sort = 'entry_date desc, bibcode desc'
        out.append({'q': q, 'sort': sort})
    elif template_type == 'authors':
        keywords = data
        start_date = (get_date() - datetime.timedelta(days=25)).date()
        q = '{0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\
            format(keywords, start_date, end_date, beg_pubyear)
        sort = 'score desc, bibcode desc'
        out.append({'q': q, 'sort': sort})
    elif template_type == 'keyword':
        keywords = data
        start_date = (get_date() - datetime.timedelta(days=25)).date()
        # most recent
        q = '{0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\
            format(keywords, start_date, end_date, beg_pubyear)
        sort = 'entry_date desc, bibcode desc'
        out.append({'q': q, 'sort': sort})
        # most popular
        q = 'trending({0})'.format(keywords)
        sort = 'score desc, bibcode desc'
        out.append({'q': q, 'sort': sort})
        # most cited
        q = 'useful({0})'.format(keywords)
        sort = 'score desc, bibcode desc'
        out.append({'q': q, 'sort': sort})

    return out