def test_search_get_range_rows(self): page = Pages(qid='wxyz', content='hi') page2 = Pages(qid='wxyz2', content='hi 2') self.app.db.session.add(page) self.app.db.session.add(page2) self.app.db.session.commit() begin = get_date(dt.datetime.utcnow()) - dt.timedelta(hours=1) end = get_date(dt.datetime.utcnow()) + dt.timedelta(hours=1) # url_for translates to: # '/search?begin=2018-12-26T18%3A27%3A02.367394%2B00%3A00&rows=1&end=2018-12-26T20%3A27%3A02.367412%2B00%3A00' r = self.client.get( url_for('turbobee_app.search', begin=begin.isoformat(), end=end.isoformat(), rows=1)) first_page = r.json[0] created = get_date(first_page['created']) first_page = r.json[0] created = dateutil.parser.parse(first_page['created']) self.assertLess(begin, created) self.assertGreater(end, created) self.assertEqual(r.status_code, 200)
def get_access_token(): '''Exchange 'code' for 'access_token' data''' payload = request.args.to_dict() if 'code' not in payload: raise Exception('Parameter code is missing') headers = {'Accept': 'application/json'} data = { 'client_id': current_app.config['ORCID_CLIENT_ID'], 'client_secret': current_app.config['ORCID_CLIENT_SECRET'], 'code': payload['code'], 'grant_type': 'authorization_code' } #print current_app.config['ORCID_OAUTH_ENDPOINT'], data, headers # do not use connection pool, always establish a new connection to the orcid remote server # we were having issue with dropped connectins mid-stream and this request is not idempotent # therefore we can't retry try: r = requests.post(current_app.config['ORCID_OAUTH_ENDPOINT'], data=data, headers=headers, timeout=current_app.config.get('CONNECTION_TIMEOUT', 30)) except (ConnectionError, ConnectTimeout, ReadTimeout) as e: logging.error('For ORCID code %s, there was a connection error with the ORCID API'.format(payload['code'])) return 'There was a connection error with the ORCID API', 502 if r.status_code != 200: logging.error('For ORCID code {}, there was an error getting the token from the ORCID API.'. format(payload['code'])) return r.text, r.status_code # update/create user account data = r.json() if 'orcid' in data: with current_app.session_scope() as session: u = session.query(User).filter_by(orcid_id=data['orcid']).options(load_only(User.orcid_id)).first() p = session.query(Profile).filter_by(orcid_id=data['orcid']).options(load_only(Profile.orcid_id)).first() if not u: u = User(orcid_id=data['orcid'], created=adsmutils.get_date()) if not p: p = Profile(orcid_id=data['orcid'], created=adsmutils.get_date()) u.updated = adsmutils.get_date() p. updated = adsmutils.get_date() u.access_token = data['access_token'] # save the user session.begin_nested() try: session.add(u) session.add(p) session.commit() except exc.IntegrityError as e: session.rollback() # per PEP-0249 a transaction is always in progress session.commit() return r.text, r.status_code
def update_profile_local(orcid_id, data=None, force=False): """Update local db with ORCID profile""" data = json.loads(data) with current_app.session_scope() as session: profile = session.query(Profile).filter_by(orcid_id=orcid_id).first() if not profile: logging.error('ORCID profile {} does not exist; creating'.format(orcid_id)) profile = Profile(orcid_id=orcid_id, created=adsmutils.get_date()) force = True # data assumed to come from ORCID API /works endpoint if data: # convert milliseconds since epoch to seconds since epoch last_modified = data['activities-summary']['last-modified-date']['value'] last_modified /= 1000. if force or (profile.updated < datetime.utcfromtimestamp(last_modified).replace(tzinfo=pytz.utc)): works = data['activities-summary']['works']['group'] new_recs = {} update_recs = {} orcid_recs = [] try: current_recs = profile.bibcode.keys() except: current_recs = [] for work in works: try: id0, rec = find_record(work) except: continue if id0 not in current_recs: new_recs.update(rec) else: # if bibcode already in the profile, keep its status rec[id0]['status'] = profile.bibcode[id0]['status'] update_recs.update(rec) orcid_recs.append(id0) profile.add_records(new_recs) profile.add_records(update_recs) # remove records from the profile that aren't in the ORCID set remove_recs = list(set(current_recs)-set(orcid_recs)) profile.remove_bibcodes(remove_recs) profile.updated = adsmutils.get_date() # save the user session.begin_nested() try: session.add(profile) session.commit() except exc.IntegrityError as e: session.rollback() logging.warning('ORCID profile database error - updated bibcodes for %s were not saved.'.format(orcid_id))
def search(): keys = request.args.keys() # default is 50, max is 100 rows = min(current_app.config.get('MAX_RETURNED', 100), int(request.args.get('rows') or 50)) with current_app.session_scope() as session: if 'begin' in keys and 'end' in keys: begin = get_date(request.args['begin']) end = get_date(request.args['end']) query = session.query(Pages).filter( Pages.created.between(begin, end)) elif 'begin' in keys: # search for all records after begin begin = get_date(request.args['begin']) query = session.query(Pages).filter(Pages.created >= begin) elif 'end' in keys: # search for all records before end end = get_date(request.args['end']) query = session.query(Pages).filter(Pages.created <= end) elif 'at' in keys: # search for all records created at specific timestamp at = get_date(request.args['at']) query = session.query(Pages).filter(Pages.created == at) elif 'null' in keys: query = session.query(Pages).filter(Pages.created == None) else: return jsonify({'msg': 'Invalid parameters %s' % keys}), 505 if 'last_id' in keys: query = query.where(Pages.id > keys['last_id']) query = query.order_by(Pages.updated.asc()) \ .limit(rows) if 'fields' in keys: # load only some fields allowed_fields = [ 'qid', 'created', 'updated', 'expires', 'lifetime', 'content_type', 'content' ] fields = keys.get('fields', allowed_fields) fields_to_load = list(set(fields) & set(allowed_fields)) query = query.options(load_only(*fields_to_load)) try: pages = query.all() # it is possible that toJSON() will eagerly load all fields (defeating load_only() above) result = map(lambda page: page.toJSON(), pages) return jsonify(result) except Exception as e: current_app.logger.error('Failed request: %s (error=%s)', keys, e) return jsonify({'msg': e.message}), 500
def get_date(self, date=None): """ :return: UTC date """ self.logger.info('Example of logging within the app.') return get_date(date).isoformat()
def test_bootstrap(self): expires = datetime.datetime.fromordinal( adsmutils.get_date().toordinal() + 5) params = { 'expires': expires.isoformat(), 'ratelimit': 0.001, 'create_new': False } r = authenticated_user.get('/accounts/bootstrap', params=params) a = r.json() r = anonymous_user.get('/accounts/bootstrap', params=params) b = r.json() # currently fails, it returns 'anonymous' for the # authenticated user if the user in db has empty 'is_active' column # also, the ratelimits must allow for more clients (i.e. be not fully # consumed) assert a['username'] != b['username'] assert a['access_token'] != b['access_token'] assert a['username'] == 'tester@ads' assert b['username'] == 'anonymous@ads' # repeating the bootstrap request should give you the # same access token for x in range(5): r = anonymous_user.get( '/accounts/bootstrap', params=params, headers={'Authorization': 'Bearer %s' % b['access_token']}) assert r.json()['access_token'] == b['access_token'] for x in range(5): r = authenticated_user.get('/accounts/bootstrap', params=params) assert r.json()['access_token'] == a['access_token']
def test_get_date(self): """Check we always work with UTC dates""" d = adsmutils.get_date() self.assertTrue(d.tzname() == u'UTC') d1 = adsmutils.get_date(u'2009-09-04T01:56:35.450686Z') self.assertTrue(d1.tzname() == u'UTC') self.assertEqual(d1.isoformat(), u'2009-09-04T01:56:35.450686+00:00') d2 = adsmutils.get_date(u'2009-09-03T20:56:35.450686-05:00') self.assertTrue(d2.tzname() == u'UTC') self.assertEqual(d2.isoformat(), u'2009-09-04T01:56:35.450686+00:00') d3 = adsmutils.get_date(u'2009-09-03T20:56:35.450686') self.assertTrue(d3.tzname() == u'UTC') self.assertEqual(d3.isoformat(), u'2009-09-03T20:56:35.450686+00:00')
def test_authenticated_user(self): # bumblebee config r = authenticated_user.get('/vault/configuration') assert r.status_code == 200 assert isinstance(r.json(), dict) assert 'link_servers' not in r.json() r = authenticated_user.get('/vault/configuration/link_servers') assert r.status_code == 200 assert isinstance(r.json(), list) assert len(r.json()) > 100 foo = get_date().isoformat() # server side user storage r = authenticated_user.post('/vault/user-data', json={'link_server': foo}) assert r.status_code == 200 assert r.json()['link_server'] == foo r = authenticated_user.get('/vault/user-data') assert r.status_code == 200 assert isinstance(r.json(), dict) assert r.json()['link_server'] == foo # i'm using my own access token, once we switch to a dedicated account # made only for testing, the qid will change too r = authenticated_user.post('/vault/query', json={'q': '*:*'}) assert r.status_code == 200 assert isinstance(r.json(), dict) qid = r.json()['qid'] # d6980601bf770d5e4f39f6766336cf87 assert qid == 'd6980601bf770d5e4f39f6766336cf87' # this numFound has fixed value from the time when qid was created, never changed numFound = r.json()['numFound'] assert int(numFound) == 14039148 r = authenticated_user.get('/vault/query/%s' % qid) assert r.status_code == 200 assert 'numfound' in r.json() r = authenticated_user.get('/vault/execute_query/%s' % qid) assert r.status_code == 200 assert r.json()['responseHeader']['params']['q'] == '*:*' assert r.json()['responseHeader']['params']['fl'] == 'id' assert r.json()['response'] # this numFound value returns current number of documents, used to check if the DB is populated assert r.json( )['response']['numFound'] > 15000000 # as of Feb 2021: 15207970 r = authenticated_user.get('/vault/execute_query/%s?fl=recid' % qid) assert r.status_code == 200 assert r.json()['responseHeader']['params']['q'] == '*:*' assert r.json()['responseHeader']['params']['fl'] == 'recid' assert r.json()['response'] # 113dc6ef2e612ffe1a0de9a16e7f494e r = authenticated_user.get('/vault/query2svg/%s' % qid) assert 'svg' in r.text assert r.headers.get('Content-Type') == 'image/svg+xml'
def get(self): """ If the current user is unauthenticated, or the current user is the "bootstrap" (anon) user, return/create a "BB Client" OAuthClient and token depending if "oauth_client" is encoded into their session cookie If the user is a authenticated as a real user, return/create a "BB Client" OAuthClient and token depending if that user already has one in the database """ # rca: I'd like to register here my distaste for Flask-Restful and # how it divorces parameters; it was a big mistake to go with that framework # and the decision shouldn't have been left to inexperienced developers # this is not recommended solution, but even the recommended solution # is just awful: http://stackoverflow.com/questions/30779584/flask-restful-passing-parameters-to-get-request parser = reqparse.RequestParser() parser.add_argument('redirect_uri', type=str) parser.add_argument('scope', type=str) parser.add_argument('client_name', type=str) parser.add_argument('ratelimit', type=float) parser.add_argument('create_new', type=inputs.boolean) parser.add_argument('expires', type=str) kwargs = parser.parse_args() client_name = kwargs.get('client_name', None) redirect_uri = kwargs.get('redirect_uri', None) ratelimit = kwargs.get('ratelimit', 1.0) create_new = kwargs.get('create_new', False) expires = kwargs.get('expires', None) if ratelimit is None: ratelimit = 1.0 assert ratelimit >= 0.0 if expires is not None: expires = get_date(expires) # throwing error on parse OK else: expires = datetime.datetime(2500, 1, 1) # If we visit this endpoint and are unauthenticated, then login as # our anonymous user if not current_user.is_authenticated(): login_user(user_manipulator.first( email=current_app.config['BOOTSTRAP_USER_EMAIL'] )) if current_user.email == current_app.config['BOOTSTRAP_USER_EMAIL']: if 'scopes' in kwargs or client_name or redirect_uri: abort(401, "Sorry, you cant change scopes/name/redirect_uri when creating temporary OAuth application") try: scopes = self._sanitize_scopes(kwargs.get('scope', None)) except ValidationError, e: return {'error': e.value}, 400
def test_authenticated_user(self): # bumblebee config r = authenticated_user.get('/vault/configuration') assert r.status_code == 200 assert isinstance(r.json(), dict) assert 'link_servers' not in r.json() r = authenticated_user.get('/vault/configuration/link_servers') assert r.status_code == 200 assert isinstance(r.json(), list) assert len(r.json()) > 100 foo = get_date().isoformat() # server side user storage r = authenticated_user.post('/vault/user-data', json={'link_server': foo}) assert r.status_code == 200 assert r.json()['link_server'] == foo r = authenticated_user.get('/vault/user-data') assert r.status_code == 200 assert isinstance(r.json(), dict) assert r.json()['link_server'] == foo # i'm using my own access token, once we switch to a dedicated account # made only for testing, the qid will change too r = authenticated_user.post('/vault/query', json={'q': '*:*'}) assert r.status_code == 200 assert isinstance(r.json(), dict) qid = r.json()['qid'] # d6980601bf770d5e4f39f6766336cf87 numFound = r.json()['numFound'] assert qid == 'd6980601bf770d5e4f39f6766336cf87' r = authenticated_user.get('/vault/query/%s' % qid) assert r.status_code == 200 assert 'numfound' in r.json() r = authenticated_user.get('/vault/execute_query/%s' % qid) assert r.status_code == 200 assert r.json()['responseHeader']['params']['q'] == '*:*' assert r.json()['responseHeader']['params']['fl'] == 'id' assert r.json()['response'] assert r.json()['response']['numFound'] > 15000000 # as of Feb 2021: 15207970 # delta increased to 2 million, as numFound initial stored value in dev vault DB is fixed at 14.03 million self.assertAlmostEqual(r.json()['response']['numFound'], int(numFound), delta=2000000) r = authenticated_user.get('/vault/execute_query/%s?fl=recid' % qid) assert r.status_code == 200 assert r.json()['responseHeader']['params']['q'] == '*:*' assert r.json()['responseHeader']['params']['fl'] == 'recid' assert r.json()['response'] # 113dc6ef2e612ffe1a0de9a16e7f494e r = authenticated_user.get('/vault/query2svg/%s' % qid) assert 'svg' in r.text assert r.headers.get('Content-Type') == 'image/svg+xml'
def get_access_token(): '''Exchange 'code' for 'access_token' data''' payload = dict(request.args) if 'code' not in payload: raise Exception('Parameter code is missing') headers = {'Accept': 'application/json'} data = { 'client_id': current_app.config['ORCID_CLIENT_ID'], 'client_secret': current_app.config['ORCID_CLIENT_SECRET'], 'code': payload['code'][0], 'grant_type': 'authorization_code' } #print current_app.config['ORCID_OAUTH_ENDPOINT'], data, headers r = current_app.client.post(current_app.config['ORCID_OAUTH_ENDPOINT'], data=data, headers=headers) if r.status_code != 200: logging.error('For ORCID code {}, there was an error getting the token from the ORCID API.'. format(payload['code'][0])) # update/create user account data = r.json() if 'orcid' in data: with current_app.session_scope() as session: u = session.query(User).filter_by(orcid_id=data['orcid']).options(load_only(User.orcid_id)).first() p = session.query(Profile).filter_by(orcid_id=data['orcid']).options(load_only(Profile.orcid_id)).first() if not u: u = User(orcid_id=data['orcid'], created=adsmutils.get_date()) if not p: p = Profile(orcid_id=data['orcid'], created=adsmutils.get_date()) u.updated = adsmutils.get_date() p. updated = adsmutils.get_date() u.access_token = data['access_token'] # save the user session.begin_nested() try: session.add(u) session.add(p) session.commit() except exc.IntegrityError as e: session.rollback() # per PEP-0249 a transaction is always in progress session.commit() return r.text, r.status_code
def test_logging(self): logdir = os.path.abspath( os.path.join(os.path.dirname(__file__), u'../../logs')) foo_log = logdir + u'/foo.bar.log' if os.path.exists(foo_log): os.remove(foo_log) logger = adsmutils.setup_logging(u'foo.bar') logger.warning(u'first') logger.handlers[0].stream.flush() self.assertTrue(os.path.exists(foo_log)) c = _read_file(foo_log) self.assertTrue('WARNING' in c) self.assertTrue('test_init.py' in c) self.assertTrue('first' in c) # now multiline message logger.warning(u'second\nthird') logger.warning(u'last') c = _read_file(foo_log) self.assertTrue(u'second\n third' in c) msecs = False for x in c.strip().split(u'\n'): datestr = x.split(u' ')[0] if datestr != u'': t = adsmutils.get_date(datestr) if t.microsecond > 0: msecs = True self.assertTrue(msecs) # test json formatter # replace the default formatter for handler in logger.handlers: handler.formatter = adsmutils.get_json_formatter() logger.info(u'test json formatter') c = _read_file(foo_log) self.assertTrue(u'"message": "test json formatter"' in c) self.assertTrue(u'"hostname":' in c) self.assertTrue(u'"lineno":' in c) # verfiy that there was only one log handler, logging to a file self.assertTrue(len(logger.handlers), 1) # now create a logger, requesting logs be written to stdout as well # so there will be two log handlers logger2 = adsmutils.setup_logging(name_=u'foo.bar.2', attach_stdout=True) self.assertTrue(len(logger2.handlers), 2)
def update_profile(orcid_id, data=None): """Inserts data into the user record and updates the 'updated' column with the most recent timestamp""" with current_app.session_scope() as session: u = session.query(User).filter_by(orcid_id=orcid_id).options(load_only(User.orcid_id)).first() if u: u.updated = adsmutils.get_date() if data: try: #verify the data is a valid JSON u.profile = json.dumps(json.loads(data)) except: logging.error('Invalid data passed in for {} (ignoring it)'.format(orcid_id)) logging.error(data) # save the user session.begin_nested() try: session.add(u) session.commit() except exc.IntegrityError as e: session.rollback() # per PEP-0249 a transaction is always in progress session.commit()
def test_utcdatetime_type(self): base = declarative_base() class Test(base): __tablename__ = u'testdate' id = sa.Column(sa.Integer, primary_key=True) created = sa.Column(adsmutils.UTCDateTime, default=adsmutils.get_date) updated = sa.Column(adsmutils.UTCDateTime) base.metadata.bind = self.app.db.session.get_bind() base.metadata.create_all() with self.app.session_scope() as session: session.add(Test()) m = session.query(Test).first() assert m.created assert m.created.tzname() == u'UTC' assert u'+00:00' in str(m.created) current = adsmutils.get_date(u'2018-09-07T20:22:02.249389+00:00') m.updated = current session.commit() m = session.query(Test).first() assert str(m.updated) == str(current)
def test_set_get_pages(self): msg = TurboBeeMsg() now = datetime.utcnow() msg.created = msg.get_timestamp(now) msg.updated = msg.get_timestamp(now) msg.expires = msg.get_timestamp(now) msg.eol = msg.get_timestamp(now) msg.set_value('hello world') msg.ctype = msg.ContentType.html msg.target = 'https:///some.com' msg.owner = 234 r = self.app.set_pages([msg]) assert 'created' in r assert len(r['created']) ==1 pages = list(self.app.get_pages(r['created'])) expected = { 'id': 1, 'target': u'https:///some.com', 'content_type': u'application/html', 'content': 'hello world', 'created': get_date(now).isoformat(), 'updated': get_date(now).isoformat(), 'expires': get_date(now).isoformat(), 'lifetime': get_date(now).isoformat(), 'owner': 234, 'qid': pages[0]['qid'] } assert pages[0] == expected msg.qid = pages[0]['qid'] r = self.app.set_pages([msg]) assert 'updated' in r assert len(r['updated']) ==1 assert r['updated'][0] == expected['qid'] msg.status = Status.deleted r = self.app.set_pages([msg]) assert 'deleted' in r assert r['deleted'][0] == expected['qid'] r = self.app.set_pages([msg]) assert r['ignored-deleted'][0] == expected['qid'] assert len(list(self.app.get_pages(expected['qid']))) == 0 # insert it again msg.status = Status.active r = self.app.set_pages([msg]) assert r['created'][0] assert r['created'][0] != expected['qid'] l = list(self.app.get_pages(r['created'], fields=['foo', 'qid', 'content', 'created'])) assert l[0]['qid'] == r['created'][0] assert l[0]['created'] == expected['created'] assert l[0]['content'] == 'hello world' assert 'updated' not in l[0] assert 'foo' not in l[0] # set multiple objects at once msg.qid = r['created'][0] msg2 = msg.loads(*msg.dump()) msg2.qid = '' r = self.app.set_pages([msg, msg2]) assert r['created'][0] assert r['updated'][0] == msg.qid # update one by one msg2.qid = r['created'][0] r = self.app.set_pages([msg, msg2], one_by_one=True) assert msg.qid in r['updated'] assert msg2.qid in r['updated'] r = self.app.set_pages([msg, msg2, msg, msg, msg], one_by_one=True) assert set(r['updated']) == set([msg.qid, msg2.qid])
def set_pages(self, msgs, fail_fast=False, one_by_one=False): """Utility method to insert into the db bunch of messages @param msgs: list of TurboBeeMsg instances @return: dict with qids of 'created', 'updated', 'errors' """ with self.session_scope() as session: out = {} for msg in msgs: op = 'updated' page = None # object may already be there, we are updating it... if msg.qid: page = session.query(Pages).filter_by(qid=msg.qid).first() if msg.status == Status.deleted and page is None: if 'ignored' not in out: out['ignored-deleted'] = [] out['ignored-deleted'].append(msg.qid) continue if page is None: op = 'created' page = Pages(qid=uuid4().hex) session.add(page) if msg.status == Status.deleted: op = 'deleted' session.delete(page) else: now = get_date() page.target = msg.target or page.target # transfer the old defaults page.content = msg.get_value() # timestamps in msgs are datetime naive, make sure we apply timezone page.created = msg.created.seconds and get_date( msg.get_datetime(msg.created)) or now page.content_type = self.guess_ctype(msg) page.updated = msg.updated.seconds and get_date( msg.get_datetime(msg.updated)) or now # should we provide defaults if not set? page.expires = msg.expires.seconds and get_date( msg.get_datetime(msg.expires)) or None page.lifetime = msg.eol.seconds and get_date( msg.get_datetime(msg.eol)) or None page.owner = msg.owner # keep the qid for later use (when session is expunged) qid = page.qid # insert one by one if one_by_one: try: session.commit() except Exception as e: self.logger.error( 'Error inserting %s into db. Message=%s', msg, e.message) session.rollback() if fail_fast: raise e else: if 'errors' not in out: out['errors'] = [] out['errors'].append({ 'qid': qid, 'msg': e.message }) if op not in out: out[op] = [] out[op].append(qid) if not one_by_one: try: session.commit() except IntegrityError as e: self.logger.error( 'Error inserting data into db. Message=%s', e.message) session.rollback() if fail_fast: raise e else: if 'errors' not in out: out['errors'] = [] out['errors'].append({'qid': None, 'msg': e.message}) return out
def _create_myads_query(template_type, frequency, data, classes=None, start_isodate=None): """ Creates a query based on the stored myADS setup (for templated queries only) :param frequency: daily or weekly :param data: keywords or other stored query template data :param classes: arXiv classes, only required for arXiv template queries :return: out: list of dicts; constructed query, dates are such that it's meant to be run today: [{q: query params, sort: sort string}] """ out = [] beg_pubyear = (get_date() - datetime.timedelta(days=180)).year end_date = get_date().date() weekly_time_range = current_app.config.get('MYADS_WEEKLY_TIME_RANGE', 6) if start_isodate: start_isodate = parser.parse(start_isodate).date() if template_type in ('arxiv', None): if frequency == 'daily': # on Mondays, deal with the weekend properly if get_date().weekday() == 0: time_range = current_app.config.get('MYADS_DAILY_TIME_RANGE', 2) start_date = (get_date() - datetime.timedelta(days=time_range)).date() else: start_date = get_date().date() elif frequency == 'weekly': start_date = (get_date() - datetime.timedelta(days=weekly_time_range)).date() # if the provided last sent date is prior to normal start date, use the earlier date if start_isodate and (start_isodate < start_date): start_date = start_isodate if template_type == 'arxiv': if not classes: raise Exception( 'Classes must be provided for an arXiv templated query') if type(classes) != list: tmp = [classes] else: tmp = classes classes = 'arxiv_class:(' + ' OR '.join( [x + '.*' if '.' not in x else x for x in tmp]) + ')' keywords = data if frequency == 'daily': connector = [' ', ' NOT '] # keyword search should be sorted by score, "other recent" should be sorted by bibcode sort_w_keywords = ['score desc, bibcode desc', 'bibcode desc'] elif frequency == 'weekly': connector = [' '] sort_w_keywords = ['score desc, bibcode desc'] if not keywords: q = 'bibstem:arxiv {0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\ format(classes, start_date, end_date, beg_pubyear) sort = 'bibcode desc' out.append({'q': q, 'sort': sort}) else: for c, s in zip(connector, sort_w_keywords): q = 'bibstem:arxiv ({0}{1}({2})) entdate:["{3}Z00:00" TO "{4}Z23:59"] pubdate:[{5}-00 TO *]'.\ format(classes, c, keywords, start_date, end_date, beg_pubyear) sort = s out.append({'q': q, 'sort': sort}) elif template_type == 'citations': keywords = data q = 'citations({0})'.format(keywords) sort = 'entry_date desc, bibcode desc' out.append({'q': q, 'sort': sort}) elif template_type == 'authors': keywords = data start_date = (get_date() - datetime.timedelta(days=weekly_time_range)).date() if start_isodate and (start_isodate < start_date): start_date = start_isodate q = '{0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\ format(keywords, start_date, end_date, beg_pubyear) sort = 'score desc, bibcode desc' out.append({'q': q, 'sort': sort}) elif template_type == 'keyword': keywords = data start_date = (get_date() - datetime.timedelta(days=weekly_time_range)).date() if start_isodate and (start_isodate < start_date): start_date = start_isodate # most recent q = '{0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\ format(keywords, start_date, end_date, beg_pubyear) sort = 'entry_date desc, bibcode desc' out.append({'q': q, 'sort': sort}) # most popular q = 'trending({0})'.format(keywords) sort = 'score desc, bibcode desc' out.append({'q': q, 'sort': sort}) # most cited q = 'useful({0})'.format(keywords) sort = 'score desc, bibcode desc' out.append({'q': q, 'sort': sort}) elif template_type is None and data: # General query - for consistency with the rest of templates, # remove lists such as: # {u'fq': [u'{!type=aqp v=$fq_database}'], # u'fq_database': [u'(database:astronomy)'], # u'q': [u'star'], # u'sort': [u'citation_count desc, bibcode desc']} # but only if there is only one element general = { k: v[0] if isinstance(v, (list, tuple)) and len(v) == 1 else v for k, v in list(data.items()) } if 'q' in general: general['q'] = '{0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\ format(general['q'], start_date, end_date, beg_pubyear) out.append(general) return out
def test_template_query(self): '''Tests storage and retrieval of templated myADS queries''' httpretty.register_uri( httpretty.GET, self.app.config.get('VAULT_SOLR_QUERY_ENDPOINT'), content_type='application/json', status=200, body="""{ "responseHeader":{ "status":0, "QTime":0, "params":{ "fl":"title,bibcode", "indent":"true", "wt":"json", "q":"*:*"}}, "response":{"numFound":10456930,"start":0,"docs":[ { "bibcode":"2005JGRC..110.4002G" }, { "bibcode":"2005JGRC..110.4003N" }, { "bibcode":"2005JGRC..110.4004Y" }]}}""") now = adsmutils.get_date().date() beg_pubyear = (now - datetime.timedelta(days=180)).year with self.app.session_scope() as session: r = session.query(User).filter_by(id=4).first() self.assertIsNone(r, True) # try to store a query with insufficient metadata r = self.client.post(url_for('user.myads_notifications'), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }, data=json.dumps({'data': 'keyword1 OR keyword2'}), content_type='application/json') self.assertStatus(r, 400) # try to store a query with data keyword of the wrong type (also insufficient metadata) r = self.client.post(url_for('user.myads_notifications'), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }, data=json.dumps({'data': 123}), content_type='application/json') self.assertStatus(r, 400) # try to store a query with the classes keyword of the wrong type r = self.client.post(url_for('user.myads_notifications'), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }, data=json.dumps({ 'type': 'template', 'template': 'arxiv', 'classes': 'astro-ph', 'data': 'keyword1 OR keyword2' }), content_type='application/json') self.assertStatus(r, 400) # store a query correctly r = self.client.post(url_for('user.myads_notifications'), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }, data=json.dumps({ 'type': 'template', 'template': 'keyword', 'data': 'keyword1 OR keyword2' }), content_type='application/json') self.assertStatus(r, 200) query_id = r.json['id'] # test that the pipeline export works as expected r = self.client.get(url_for('user.get_myads', user_id='4'), headers={'Authorization': 'secret'}) start_date = (adsmutils.get_date() - datetime.timedelta(days=25)).date() self.assertStatus(r, 200) self.assertEquals(r.json[0]['id'], query_id) self.assertEquals(r.json[0]['name'], 'keyword1, etc.') self.assertTrue(r.json[0]['active']) self.assertFalse(r.json[0]['stateful']) self.assertEquals(r.json[0]['frequency'], 'weekly') self.assertEquals(r.json[0]['type'], 'template') self.assertEquals(r.json[0]['template'], 'keyword') self.assertEquals(r.json[0]['data'], 'keyword1 OR keyword2') # try to retrieve a query without a user ID in the headers r = self.client.get(url_for('user.myads_notifications', myads_id=query_id), headers={'Authorization': 'secret'}) self.assertStatus(r, 400) # successfully retrieve a query setup r = self.client.get(url_for('user.myads_notifications', myads_id=query_id), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }) self.assertStatus(r, 200) self.assertEquals(r.json[0]['id'], query_id) self.assertEquals(r.json[0]['name'], 'keyword1, etc.') self.assertTrue(r.json[0]['active']) self.assertFalse(r.json[0]['stateful']) self.assertEquals(r.json[0]['frequency'], 'weekly') self.assertEquals(r.json[0]['type'], 'template') # successfully delete the query setup r = self.client.delete(url_for('user.myads_notifications', myads_id=query_id), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }) self.assertStatus(r, 204) # ensure the query is really deleted with self.app.session_scope() as session: q = session.query(MyADS).filter_by(id=query_id).first() self.assertIsNone(q) # ensure the get returns the right status for a missing query r = self.client.get(url_for('user.myads_notifications', myads_id=query_id), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }) self.assertStatus(r, 404) # save an arxiv template query successfully r = self.client.post(url_for('user.myads_notifications'), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }, data=json.dumps({ 'type': 'template', 'template': 'arxiv', 'data': 'keyword1 OR keyword2', 'classes': ['astro-ph'] }), content_type='application/json') self.assertStatus(r, 200) query_id = r.json['id'] # check the stored query via the pipeline export r = self.client.get(url_for('user.get_myads', user_id='4'), headers={'Authorization': 'secret'}) if adsmutils.get_date().weekday() == 0: start_date = (adsmutils.get_date() - datetime.timedelta(days=2)).date() else: start_date = adsmutils.get_date().date() self.assertStatus(r, 200) self.assertEquals(r.json[0]['id'], query_id) self.assertEquals(r.json[0]['name'], 'keyword1, etc. - Recent Papers') self.assertFalse(r.json[0]['stateful']) self.assertEquals(r.json[0]['type'], 'template') self.assertTrue(r.json[0]['active']) self.assertEquals(r.json[0]['frequency'], 'daily') self.assertEquals(r.json[0]['template'], 'arxiv') self.assertEquals(r.json[0]['data'], 'keyword1 OR keyword2') self.assertEquals(r.json[0]['classes'], [u'astro-ph']) # edit the stored query r = self.client.put(url_for('user.myads_notifications', myads_id=query_id), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }, data=json.dumps({ 'type': 'template', 'template': 'arxiv', 'data': 'keyword1 OR keyword2 OR keyword3', 'classes': ['astro-ph'] }), content_type='application/json') self.assertStatus(r, 200) # check editing the query name r = self.client.put(url_for('user.myads_notifications', myads_id=query_id), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }, data=json.dumps({ 'type': 'template', 'template': 'arxiv', 'name': 'keyword1, etc. - Recent Papers', 'data': 'keyword2 OR keyword3', 'classes': ['astro-ph'] }), content_type='application/json') self.assertStatus(r, 200) # name was provided, but it was constructed, so the name should be updated self.assertEquals(r.json['name'], 'keyword2, etc. - Recent Papers') r = self.client.put(url_for('user.myads_notifications', myads_id=query_id), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }, data=json.dumps({ 'type': 'template', 'template': 'arxiv', 'name': 'test query', 'data': 'keyword2 OR keyword3', 'classes': ['astro-ph'] }), content_type='application/json') self.assertStatus(r, 200) # a non-constructed name was provided - use that self.assertEquals(r.json['name'], 'test query') r = self.client.put(url_for('user.myads_notifications', myads_id=query_id), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }, data=json.dumps({ 'type': 'template', 'template': 'arxiv', 'data': 'keyword1 OR keyword2 OR keyword3', 'classes': ['astro-ph'] }), content_type='application/json') self.assertStatus(r, 200) # no name is provided, so keep the old provided name self.assertEquals(r.json['name'], 'test query') # check the exported setup r = self.client.get(url_for('user.get_myads', user_id='4'), headers={'Authorization': 'secret'}) self.assertStatus(r, 200) self.assertEquals(r.json[0]['id'], query_id) self.assertEquals(r.json[0]['name'], 'test query') self.assertFalse(r.json[0]['stateful']) self.assertEquals(r.json[0]['type'], 'template') self.assertTrue(r.json[0]['active']) self.assertEquals(r.json[0]['frequency'], 'daily') self.assertEquals(r.json[0]['template'], 'arxiv') self.assertEquals(r.json[0]['data'], 'keyword1 OR keyword2 OR keyword3') self.assertEquals(r.json[0]['classes'], ['astro-ph']) # add a second query r = self.client.post(url_for('user.myads_notifications'), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }, data=json.dumps({ 'type': 'template', 'template': 'authors', 'data': 'author:"Kurtz, M."' }), content_type='application/json') self.assertStatus(r, 200) self.assertEquals(r.json['name'], 'Favorite Authors - Recent Papers') # get all queries back r = self.client.get(url_for('user.myads_notifications'), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }) self.assertStatus(r, 200) self.assertEquals(r.json[0]['name'], 'test query') self.assertEquals(r.json[1]['name'], 'Favorite Authors - Recent Papers') # save an arXiv query without keywords r = self.client.post(url_for('user.myads_notifications'), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }, data=json.dumps({ 'type': 'template', 'template': 'arxiv', 'classes': ['cs'] }), content_type='application/json') self.assertStatus(r, 200) self.assertEquals(r.json['data'], None) r = self.client.post(url_for('user.myads_notifications'), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }, data=json.dumps({ 'type': 'template', 'data': '', 'template': 'arxiv', 'classes': ['hep-ex'] }), content_type='application/json') self.assertStatus(r, 200) self.assertEquals(r.json['data'], None) # test a blank arXiv query r = self.client.post(url_for('user.myads_notifications'), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }, data=json.dumps({ 'type': 'template', 'template': 'arxiv', 'classes': ['astro-ph'] }), content_type='application/json') self.assertStatus(r, 200) query_id = r.json['id'] # make sure it's editable r = self.client.put(url_for('user.myads_notifications', myads_id=query_id), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }, data=json.dumps({ 'type': 'template', 'template': 'arxiv', 'active': False }), content_type='application/json') self.assertStatus(r, 200) r = self.client.put(url_for('user.myads_notifications', myads_id=query_id), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }, data=json.dumps({ 'type': 'template', 'template': 'arxiv', 'data': 'keyword1', 'classes': ['astro-ph'] }), content_type='application/json') self.assertStatus(r, 200) # test the citation query construction r = self.client.post(url_for('user.myads_notifications'), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }, data=json.dumps({ 'type': 'template', 'template': 'citations', 'data': 'author:"Kurtz, Michael"' }), content_type='application/json') self.assertStatus(r, 200) query_id = r.json['id'] r = self.client.get(url_for('user.myads_notifications', myads_id=query_id), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }) self.assertStatus(r, 200) self.assertEquals(r.json[0]['id'], query_id) self.assertEquals(r.json[0]['name'], 'author:"Kurtz, Michael" - Citations') self.assertTrue(r.json[0]['active']) self.assertTrue(r.json[0]['stateful']) self.assertEquals(r.json[0]['frequency'], 'weekly') self.assertEquals(r.json[0]['type'], 'template') # test the author query construction r = self.client.post(url_for('user.myads_notifications'), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }, data=json.dumps({ 'type': 'template', 'template': 'authors', 'data': 'author:"Kurtz, Michael"' }), content_type='application/json') self.assertStatus(r, 200) query_id = r.json['id'] r = self.client.get(url_for('user.myads_notifications', myads_id=query_id), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '4' }) start_date = (adsmutils.get_date() - datetime.timedelta(days=25)).date() self.assertStatus(r, 200) self.assertEquals(r.json[0]['id'], query_id) self.assertEquals(r.json[0]['name'], 'Favorite Authors - Recent Papers') self.assertTrue(r.json[0]['active']) self.assertTrue(r.json[0]['stateful']) self.assertEquals(r.json[0]['frequency'], 'weekly') self.assertEquals(r.json[0]['type'], 'template')
def post(self, library): """ HTTP POST request that conducts operations at the library level. :param library: primary library ID :return: response if operation was successful Header: ------- Must contain the API forwarded user ID of the user accessing the end point Post body: ---------- KEYWORD, VALUE libraries: <list> List of secondary libraries to include in the action (optional, based on action) action: <unicode> union, intersection, difference, copy, empty Actions to perform on given libraries: Union: requires one or more secondary libraries to be passed; takes the union of the primary and secondary library sets; a new library is created Intersection: requires one or more secondary libraries to be passed; takes the intersection of the primary and secondary library sets; a new library is created Difference: requires one or more secondary libraries to be passed; takes the difference between the primary and secondary libraries; the primary library comes first in the operation, so the secondary library is removed from the primary; a new library is created Copy: requires one and only one secondary library to be passed; the primary library will be copied into the secondary library (so the secondary library will be overwritten); no new library is created Empty: secondary libraries are ignored; the primary library will be emptied of its contents, though the library and metadata will remain; no new library is created name: <string> (optional) name of the new library (must be unique for that user); used only for actions in [union, intersection, difference] description: <string> (optional) description of the new library; used only for actions in [union, intersection, difference] public: <boolean> (optional) is the new library public to view; used only for actions in [union, intersection, difference] ----------- Return data: ----------- name: <string> Name of the library id: <string> ID of the library description: <string> Description of the library Permissions: ----------- The following type of user can conduct library operations: - owner - admin - write """ # Get the user requesting this from the header try: user_editing = self.helper_get_user_id() except KeyError: return err(MISSING_USERNAME_ERROR) # URL safe base64 string to UUID try: library_uuid = self.helper_slug_to_uuid(library) except TypeError: return err(BAD_LIBRARY_ID_ERROR) user_editing_uid = \ self.helper_absolute_uid_to_service_uid(absolute_uid=user_editing) # Check the permissions of the user if not self.write_access(service_uid=user_editing_uid, library_id=library_uuid): return err(NO_PERMISSION_ERROR) try: data = get_post_data(request, types=dict(libraries=list, action=str, name=str, description=str, public=bool)) except TypeError as error: current_app.logger.error( 'Wrong type passed for POST: {0} [{1}]'.format( request.data, error)) return err(WRONG_TYPE_ERROR) if data['action'] in ['union', 'intersection', 'difference']: if 'libraries' not in data: return err(NO_LIBRARY_SPECIFIED_ERROR) if 'name' not in data: data['name'] = 'Untitled {0}.'.format(get_date().isoformat()) if 'public' not in data: data['public'] = False if data['action'] == 'copy': if 'libraries' not in data: return err(NO_LIBRARY_SPECIFIED_ERROR) if len(data['libraries']) > 1: return err(TOO_MANY_LIBRARIES_SPECIFIED_ERROR) lib_names = [] with current_app.session_scope() as session: primary = session.query(Library).filter_by(id=library_uuid).one() lib_names.append(primary.name) if 'libraries' in data: for lib in data['libraries']: try: secondary_uuid = self.helper_slug_to_uuid(lib) except TypeError: return err(BAD_LIBRARY_ID_ERROR) secondary = session.query(Library).filter_by( id=secondary_uuid).one() lib_names.append(secondary.name) if data['action'] == 'union': bib_union = self.setops_libraries(library_id=library_uuid, document_data=data, operation='union') current_app.logger.info( 'Successfully took the union of the libraries {0} (IDs: {1}, {2})' .format(', '.join(lib_names), library, ', '.join(data['libraries']))) data['bibcode'] = bib_union if 'description' not in data: description = 'Union of libraries {0} (IDs: {1}, {2})' \ .format(', '.join(lib_names), library, ', '.join(data['libraries'])) # field length capped in model if len(description) > 200: description = 'Union of library {0} (ID: {1}) with {2} other libraries'\ .format(lib_names[0], library, len(lib_names[1:])) data['description'] = description try: library_dict = self.create_library( service_uid=user_editing_uid, library_data=data) except BackendIntegrityError as error: current_app.logger.error(error) return err(DUPLICATE_LIBRARY_NAME_ERROR) except TypeError as error: current_app.logger.error(error) return err(WRONG_TYPE_ERROR) return library_dict, 200 elif data['action'] == 'intersection': bib_intersect = self.setops_libraries(library_id=library_uuid, document_data=data, operation='intersection') current_app.logger.info( 'Successfully took the intersection of the libraries {0} (IDs: {1}, {2})' .format(', '.join(lib_names), library, ', '.join(data['libraries']))) data['bibcode'] = bib_intersect if 'description' not in data: description = 'Intersection of {0} (IDs: {1}, {2})' \ .format(', '.join(lib_names), library, ', '.join(data['libraries'])) if len(description) > 200: description = 'Intersection of {0} (ID: {1}) with {2} other libraries'\ .format(lib_names[0], library, len(lib_names[1:])) data['description'] = description try: library_dict = self.create_library( service_uid=user_editing_uid, library_data=data) except BackendIntegrityError as error: current_app.logger.error(error) return err(DUPLICATE_LIBRARY_NAME_ERROR) except TypeError as error: current_app.logger.error(error) return err(WRONG_TYPE_ERROR) return library_dict, 200 elif data['action'] == 'difference': bib_diff = self.setops_libraries(library_id=library_uuid, document_data=data, operation='difference') current_app.logger.info( 'Successfully took the difference of {0} (ID {2}) - (minus) {1} (ID {3})' .format(lib_names[0], ', '.join(lib_names[1:]), library, ', '.join(data['libraries']))) data['bibcode'] = bib_diff if 'description' not in data: data['description'] = 'Records that are in {0} (ID {2}) but not in {1} (ID {3})' \ .format(lib_names[0], ', '.join(lib_names[1:]), library, ', '.join(data['libraries'])) try: library_dict = self.create_library( service_uid=user_editing_uid, library_data=data) except BackendIntegrityError as error: current_app.logger.error(error) return err(DUPLICATE_LIBRARY_NAME_ERROR) except TypeError as error: current_app.logger.error(error) return err(WRONG_TYPE_ERROR) return library_dict, 200 elif data['action'] == 'copy': library_dict = self.copy_library(library_id=library_uuid, document_data=data) current_app.logger.info( 'Successfully copied {0} (ID {2}) into {1} (ID {3})'.format( lib_names[0], lib_names[1], library, data['libraries'][0])) with current_app.session_scope() as session: libid = self.helper_slug_to_uuid(data['libraries'][0]) library = session.query(Library).filter_by(id=libid).one() bib = library.get_bibcodes() library_dict['bibcode'] = bib return library_dict, 200 elif data['action'] == 'empty': library_dict = self.empty_library(library_id=library_uuid) current_app.logger.info( 'Successfully emptied {0} (ID {1}) of all records'.format( lib_names[0], library)) with current_app.session_scope() as session: library = session.query(Library).filter_by( id=library_uuid).one() bib = library.get_bibcodes() library_dict['bibcode'] = bib return library_dict, 200 else: current_app.logger.info('User requested a non-standard operation') return {}, 400
def post(self, library): """ HTTP POST request that conducts operations at the library level. :param library: primary library ID :return: response if operation was successful Header: ------- Must contain the API forwarded user ID of the user accessing the end point Post body: ---------- KEYWORD, VALUE libraries: <list> List of secondary libraries to include in the action (optional, based on action) action: <unicode> union, intersection, difference, copy, empty Actions to perform on given libraries: Union: requires one or more secondary libraries to be passed; takes the union of the primary and secondary library sets; a new library is created Intersection: requires one or more secondary libraries to be passed; takes the intersection of the primary and secondary library sets; a new library is created Difference: requires one or more secondary libraries to be passed; takes the difference between the primary and secondary libraries; the primary library comes first in the operation, so the secondary library is removed from the primary; a new library is created Copy: requires one and only one secondary library to be passed; the primary library will be copied into the secondary library (so the secondary library will be overwritten); no new library is created Empty: secondary libraries are ignored; the primary library will be emptied of its contents, though the library and metadata will remain; no new library is created name: <string> (optional) name of the new library (must be unique for that user); used only for actions in [union, intersection, difference] description: <string> (optional) description of the new library; used only for actions in [union, intersection, difference] public: <boolean> (optional) is the new library public to view; used only for actions in [union, intersection, difference] ----------- Return data: ----------- name: <string> Name of the library id: <string> ID of the library description: <string> Description of the library Permissions: ----------- The following type of user can conduct library operations: - owner - admin - write """ # Get the user requesting this from the header try: user_editing = self.helper_get_user_id() except KeyError: return err(MISSING_USERNAME_ERROR) # URL safe base64 string to UUID library_uuid = self.helper_slug_to_uuid(library) user_editing_uid = \ self.helper_absolute_uid_to_service_uid(absolute_uid=user_editing) # Check the permissions of the user if not self.write_access(service_uid=user_editing_uid, library_id=library_uuid): return err(NO_PERMISSION_ERROR) try: data = get_post_data( request, types=dict(libraries=list, action=basestring, name=basestring, description=basestring, public=bool) ) except TypeError as error: current_app.logger.error('Wrong type passed for POST: {0} [{1}]' .format(request.data, error)) return err(WRONG_TYPE_ERROR) if data['action'] in ['union', 'intersection', 'difference']: if 'libraries' not in data: return err(NO_LIBRARY_SPECIFIED_ERROR) if 'name' not in data: data['name'] = 'Untitled {0}.'.format(get_date().isoformat()) if 'public' not in data: data['public'] = False if data['action'] == 'copy': if 'libraries' not in data: return err(NO_LIBRARY_SPECIFIED_ERROR) if len(data['libraries']) > 1: return err(TOO_MANY_LIBRARIES_SPECIFIED_ERROR) lib_names = [] with current_app.session_scope() as session: primary = session.query(Library).filter_by(id=library_uuid).one() lib_names.append(primary.name) if 'libraries' in data: for lib in data['libraries']: secondary_uuid = self.helper_slug_to_uuid(lib) secondary = session.query(Library).filter_by(id=secondary_uuid).one() lib_names.append(secondary.name) if data['action'] == 'union': bib_union = self.setops_libraries( library_id=library_uuid, document_data=data, operation='union' ) current_app.logger.info('Successfully took the union of the libraries {0} (IDs: {1}, {2})' .format(', '.join(lib_names), library, ', '.join(data['libraries']))) data['bibcode'] = bib_union if 'description' not in data: data['description'] = 'Union of libraries {0} (IDs: {1}, {2})' \ .format(', '.join(lib_names), library, ', '.join(data['libraries'])) try: library_dict = self.create_library(service_uid=user_editing_uid, library_data=data) except BackendIntegrityError as error: current_app.logger.error(error) return err(DUPLICATE_LIBRARY_NAME_ERROR) except TypeError as error: current_app.logger.error(error) return err(WRONG_TYPE_ERROR) return library_dict, 200 elif data['action'] == 'intersection': bib_intersect = self.setops_libraries( library_id=library_uuid, document_data=data, operation='intersection' ) current_app.logger.info('Successfully took the intersection of the libraries {0} (IDs: {1}, {2})' .format(', '.join(lib_names), library, ', '.join(data['libraries']))) data['bibcode'] = bib_intersect if 'description' not in data: data['description'] = 'Intersection of {0} (IDs: {1}, {2})' \ .format(', '.join(lib_names), library, ', '.join(data['libraries'])) try: library_dict = self.create_library(service_uid=user_editing_uid, library_data=data) except BackendIntegrityError as error: current_app.logger.error(error) return err(DUPLICATE_LIBRARY_NAME_ERROR) except TypeError as error: current_app.logger.error(error) return err(WRONG_TYPE_ERROR) return library_dict, 200 elif data['action'] == 'difference': bib_diff = self.setops_libraries( library_id=library_uuid, document_data=data, operation='difference' ) current_app.logger.info('Successfully took the difference of {0} (ID {2}) - (minus) {1} (ID {3})' .format(lib_names[0], ', '.join(lib_names[1:]), library, ', '.join(data['libraries']))) data['bibcode'] = bib_diff if 'description' not in data: data['description'] = 'Records that are in {0} (ID {2}) but not in {1} (ID {3})' \ .format(lib_names[0], ', '.join(lib_names[1:]), library, ', '.join(data['libraries'])) try: library_dict = self.create_library(service_uid=user_editing_uid, library_data=data) except BackendIntegrityError as error: current_app.logger.error(error) return err(DUPLICATE_LIBRARY_NAME_ERROR) except TypeError as error: current_app.logger.error(error) return err(WRONG_TYPE_ERROR) return library_dict, 200 elif data['action'] == 'copy': library_dict = self.copy_library( library_id=library_uuid, document_data=data ) current_app.logger.info('Successfully copied {0} (ID {2}) into {1} (ID {3})' .format(lib_names[0], lib_names[1], library, data['libraries'][0])) with current_app.session_scope() as session: libid = self.helper_slug_to_uuid(data['libraries'][0]) library = session.query(Library).filter_by(id=libid).one() bib = library.get_bibcodes() library_dict['bibcode'] = bib return library_dict, 200 elif data['action'] == 'empty': library_dict = self.empty_library( library_id=library_uuid ) current_app.logger.info('Successfully emptied {0} (ID {1}) of all records' .format(lib_names[0], library)) with current_app.session_scope() as session: library = session.query(Library).filter_by(id=library_uuid).one() bib = library.get_bibcodes() library_dict['bibcode'] = bib return library_dict, 200 else: current_app.logger.info('User requested a non-standard operation') return {}, 400
def test_myads_execute_notification(self): httpretty.register_uri( httpretty.GET, self.app.config.get('VAULT_SOLR_QUERY_ENDPOINT'), content_type='application/json', status=200, body="""{ "responseHeader":{ "status":0, "QTime":0, "params":{ "fl":"title,bibcode", "indent":"true", "wt":"json", "q":"*:*"}}, "response":{"numFound":10456930,"start":0,"docs":[ { "bibcode":"2005JGRC..110.4002G" }, { "bibcode":"2005JGRC..110.4003N" }, { "bibcode":"2005JGRC..110.4004Y" }]}}""") now = adsmutils.get_date().date() beg_pubyear = (now - datetime.timedelta(days=180)).year # can't use as anonymous user user_id = self.app.config.get('BOOTSTRAP_USER_ID') r = self.client.get(url_for('user.execute_myads_query', myads_id=123), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': user_id }) self.assertStatus(r, 400) user_id = 6 r = self.client.post(url_for('user.myads_notifications'), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': user_id }, data=json.dumps({ 'type': 'template', 'template': 'authors', 'data': 'author:"Kurtz, Michael"' }), content_type='application/json') self.assertStatus(r, 200) query_id = r.json['id'] r = self.client.get(url_for('user.execute_myads_query', myads_id=query_id), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': user_id }) start_date = (adsmutils.get_date() - datetime.timedelta(days=25)).date() self.assertStatus(r, 200) self.assertEquals(r.json, [{ 'q': 'author:"Kurtz, Michael" entdate:["{0}Z00:00" TO "{1}Z23:59"] ' 'pubdate:[{2}-00 TO *]'.format(start_date, now, beg_pubyear), 'sort': 'score desc, bibcode desc' }])
def test_myads_retrieval(self): '''Tests pipeline retrieval of myADS setup and users''' now = adsmutils.get_date() with self.app.session_scope() as session: q = session.query(Query).first() qid = q.qid # make sure no setups exist r = self.client.get(url_for('user.myads_notifications'), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '3' }) self.assertStatus(r, 204) # try saving a query with bad data r = self.client.post(url_for('user.myads_notifications'), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '3' }, data=json.dumps({ 'name': 'Query 1', 'qid': qid, 'stateful': True, 'frequency': 'bad data', 'type': 'query' }), content_type='application/json') self.assertStatus(r, 400) # save the query correctly r = self.client.post(url_for('user.myads_notifications'), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '3' }, data=json.dumps({ 'name': 'Query 1', 'qid': qid, 'stateful': True, 'frequency': 'daily', 'type': 'query' }), content_type='application/json') self.assertStatus(r, 200) self.assert_(r.json['name'] == 'Query 1') self.assertTrue(r.json['active']) myads_id = r.json['id'] # edit the query with bad data r = self.client.put(url_for('user.myads_notifications', myads_id=myads_id), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '3' }, data=json.dumps({ 'name': 'Query 1 - edited', 'stateful': 'bad data' }), content_type='application/json') self.assertStatus(r, 400) # edit the query correctly r = self.client.put(url_for('user.myads_notifications', myads_id=myads_id), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '3' }, data=json.dumps({'name': 'Query 1 - edited'}), content_type='application/json') self.assertStatus(r, 200) self.assertEquals(r.json['name'], 'Query 1 - edited') # get all myADS setups via the pipeline endpoint r = self.client.get(url_for('user.get_myads', user_id='3'), headers={'Authorization': 'secret'}) self.assertStatus(r, 200) self.assertEquals(r.json[0]['name'], 'Query 1 - edited') self.assertEquals(r.json[0]['qid'], qid) self.assertTrue(r.json[0]['active']) self.assertTrue(r.json[0]['stateful']) self.assertEquals(r.json[0]['frequency'], 'daily') self.assertEquals(r.json[0]['type'], 'query') # get all myADS setups via the BBB endpoint r = self.client.get(url_for('user.myads_notifications'), headers={ 'Authorization': 'secret', 'X-Adsws-Uid': '3' }) self.assertStatus(r, 200) self.assertEquals(r.json[0]['name'], 'Query 1 - edited') self.assertTrue(r.json[0]['active']) self.assertEquals(r.json[0]['frequency'], 'daily') self.assertEquals(r.json[0]['type'], 'query') # fetch the active myADS users r = self.client.get(url_for('user.export', iso_datestring=now)) self.assertStatus(r, 200) self.assertEquals(r.json, {'users': [3]})
def _create_myads_query(template_type, frequency, data, classes=None): """ Creates a query based on the stored myADS setup (for templated queries only) :param frequency: daily or weekly :param data: keywords or other stored query template data :param classes: arXiv classes, only required for arXiv template queries :return: out: list of dicts; constructed query, dates are such that it's meant to be run today: [{q: query params, sort: sort string}] """ out = [] beg_pubyear = (get_date() - datetime.timedelta(days=180)).year end_date = get_date().date() if template_type == 'arxiv': if not classes: raise Exception( 'Classes must be provided for an arXiv templated query') if type(classes) != list: tmp = [classes] else: tmp = classes classes = 'arxiv_class:(' + ' OR '.join( [x + '.*' if '.' not in x else x for x in tmp]) + ')' keywords = data if frequency == 'daily': connector = [' ', ' NOT '] # keyword search should be sorted by score, "other recent" should be sorted by bibcode sort_w_keywords = ['score desc, bibcode desc', 'bibcode desc'] # on Mondays, deal with the weekend properly if get_date().weekday() == 0: start_date = (get_date() - datetime.timedelta(days=2)).date() else: start_date = get_date().date() elif frequency == 'weekly': connector = [' '] sort_w_keywords = ['score desc, bibcode desc'] start_date = (get_date() - datetime.timedelta(days=25)).date() if not keywords: q = 'bibstem:arxiv {0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\ format(classes, start_date, end_date, beg_pubyear) sort = 'bibcode desc' out.append({'q': q, 'sort': sort}) else: for c, s in zip(connector, sort_w_keywords): q = 'bibstem:arxiv ({0}{1}({2})) entdate:["{3}Z00:00" TO "{4}Z23:59"] pubdate:[{5}-00 TO *]'.\ format(classes, c, keywords, start_date, end_date, beg_pubyear) sort = s out.append({'q': q, 'sort': sort}) elif template_type == 'citations': keywords = data q = 'citations({0})'.format(keywords) sort = 'entry_date desc, bibcode desc' out.append({'q': q, 'sort': sort}) elif template_type == 'authors': keywords = data start_date = (get_date() - datetime.timedelta(days=25)).date() q = '{0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\ format(keywords, start_date, end_date, beg_pubyear) sort = 'score desc, bibcode desc' out.append({'q': q, 'sort': sort}) elif template_type == 'keyword': keywords = data start_date = (get_date() - datetime.timedelta(days=25)).date() # most recent q = '{0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\ format(keywords, start_date, end_date, beg_pubyear) sort = 'entry_date desc, bibcode desc' out.append({'q': q, 'sort': sort}) # most popular q = 'trending({0})'.format(keywords) sort = 'score desc, bibcode desc' out.append({'q': q, 'sort': sort}) # most cited q = 'useful({0})'.format(keywords) sort = 'score desc, bibcode desc' out.append({'q': q, 'sort': sort}) return out