def _get_coauthors_fallback(personid, collabs): # python 2.4 does not supprt max() with key argument. # Please remove this function when python 2.6 is supported. def max_key(iterable, key): try: ret = iterable[0] except IndexError: return None for i in iterable[1:]: if key(i) > key(ret): ret = i return ret if collabs: query = 'exactauthor:"%s" and (%s)' % (personid, ' or '.join([('collaboration:"%s"' % x) for x in zip(*collabs)[0]])) exclude_recs = perform_request_search(rg=0, p=query) else: exclude_recs = [] recids = perform_request_search(rg=0, p='exactauthor:"%s"' % str(personid)) recids = list(set(recids) - set(exclude_recs)) a = format_records(recids, 'WAPAFF') a = [pickle.loads(p) for p in a.split('!---THEDELIMITER---!') if p] coauthors = {} for rec, affs in a: keys = affs.keys() for n in keys: try: coauthors[n].add(rec) except KeyError: coauthors[n] = set([rec]) coauthors = [(x, x, len(coauthors[x])) for x in coauthors if x.lower() != personid.lower()] return coauthors
def response_formated_records(records, collection, of, **kwargs): """Return formatter records. Response contains correct Cache and TTL information in HTTP headers. """ response = make_response(format_records(records, collection=collection, of=of, **kwargs)) response.mimetype = get_output_format_content_type(of) current_time = datetime.datetime.now() response.headers['Last-Modified'] = http_date( time.mktime(current_time.timetuple()) ) expires = current_app.config.get( 'CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT', None) if expires is None: response.headers['Cache-Control'] = ( 'no-store, no-cache, must-revalidate, ' 'post-check=0, pre-check=0, max-age=0' ) response.headers['Expires'] = '-1' else: expires_time = current_time + datetime.timedelta(seconds=expires) response.headers['Vary'] = 'Accept' response.headers['Cache-Control'] = ( 'public' if current_user.is_guest else 'private' ) response.headers['Expires'] = http_date(time.mktime( expires_time.timetuple() )) return response
def response_formated_records(records, collection, of, **kwargs): """Return formatter records. Response contains correct Cache and TTL information in HTTP headers. """ response = make_response( format_records(records, collection=collection, of=of, **kwargs)) response.mimetype = get_output_format_content_type(of) current_time = datetime.datetime.now() response.headers['Last-Modified'] = http_date( time.mktime(current_time.timetuple())) expires = current_app.config.get('CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT', None) if expires is None: response.headers['Cache-Control'] = ( 'no-store, no-cache, must-revalidate, ' 'post-check=0, pre-check=0, max-age=0') response.headers['Expires'] = '-1' else: expires_time = current_time + datetime.timedelta(seconds=expires) response.headers['Vary'] = 'Accept' response.headers['Cache-Control'] = ('public' if current_user.is_guest else 'private') response.headers['Expires'] = http_date( time.mktime(expires_time.timetuple())) return response
def _get_institute_pub_dict_fallback(recids, names_list, person_id): """return a dictionary consisting of institute -> list of publications""" recids = perform_request_search(rg=0, p='exactauthor:"%s"' % str(person_id)) a = format_records(recids, 'WAPAFF') a = [pickle.loads(p) for p in a.split('!---THEDELIMITER---!') if p] affdict = {} for rec, affs in a: keys = affs.keys() for name in names_list: if name in keys and affs[name][0]: try: affdict[affs[name][0]].add(rec) except KeyError: affdict[affs[name][0]] = set([rec]) return affdict
def _get_institute_pub_dict_bai(recids, names_list, person_id): """return a dictionary consisting of institute -> list of publications""" try: cid = get_canonical_id_from_personid(person_id)[0][0] except IndexError: cid = person_id recids = perform_request_search(rg=0, p='author:%s' % str(cid)) a = format_records(recids, 'WAPAFF') a = [pickle.loads(p) for p in a.split('!---THEDELIMITER---!') if p] affdict = {} for rec, affs in a: keys = affs.keys() for name in names_list: if name in keys and affs[name][0]: try: affdict[affs[name][0]].add(rec) except KeyError: affdict[affs[name][0]] = set([rec]) return affdict
def cvify_records(recids, of, req=None, so='d'): """ Write a CV for records RECIDS in the format OF in language LN. REQ is the Apache/mod_python request object. """ # intbitsets don't support indexing, so we need a list from our hitset first recids = [hit for hit in recids] if so == 'd': recids.reverse() if of.startswith('h'): if of == 'hcv': format_records(recids, of=of, record_prefix=lambda count: '%d) ' % (count+1), req=req) elif of == 'htcv': format_records(recids, of=of, record_prefix=lambda count: '%d) ' % (count+1), req=req) elif of == 'tlcv': HEADER = r''' \documentclass{article} %%To use pdflatex, uncomment these lines, as well as the \href lines %%in each entry %%\usepackage[pdftex, %% colorlinks=true, %% urlcolor=blue, %% \href{...}{...} external (URL) %% filecolor=green, %% \href{...} local file %% linkcolor=red, %% \ref{...} and \pageref{...} %% pdftitle={Papers by AUTHOR}, %% pdfauthor={Your Name}, %% pdfsubject={Just a test}, %% pdfkeywords={test testing testable}, %% pagebackref, %% pdfpagemode=None, %% bookmarksopen=true]{hyperref} %%usepackage{arial} %%\renewcommand{\familydefault}{\sfdefault} %% San serif \renewcommand{\labelenumii}{\arabic{enumi}.\arabic{enumii}} \pagestyle{empty} \oddsidemargin 0.0in \textwidth 6.5in \topmargin -0.75in \textheight 9.5in \begin{document} \title{Papers by AUTHOR} \author{} \date{} \maketitle \begin{enumerate} %%%% LIST OF PAPERS %%%% Please comment out anything between here and the %%%% first \item %%%% Please send any updates or corrections to the list to %%%% %(email)s ''' % { 'email' : CFG_SITE_SUPPORT_EMAIL, } FOOTER = r''' \end{enumerate} \end{document} ''' format_records(recids, of=of, prologue=HEADER, epilogue=FOOTER, req=req) return ''
def search(self, read_cache=True, **kwparams): """ Returns records corresponding to the given search query. See docstring of invenio.legacy.search_engine.perform_request_search() for an overview of available parameters. @raise InvenioConnectorAuthError: if authentication fails """ parse_results = False of = kwparams.get('of', "") if of == "": parse_results = True of = "xm" kwparams['of'] = of params = urllib.urlencode(kwparams, doseq=1) # Are we running locally? If so, better directly access the # search engine directly if self.local and of != 't': # See if user tries to search any restricted collection c = kwparams.get('c', "") if c != "": if type(c) is list: colls = c else: colls = [c] for collection in colls: if collection_restricted_p(collection): if self.user: self._check_credentials() continue raise InvenioConnectorAuthError("You are trying to search a restricted collection. Please authenticate yourself.\n") kwparams['of'] = 'id' results = perform_request_search(**kwparams) if of.lower() != 'id': results = format_records(results, of) else: if params + str(parse_results) not in self.cached_queries or not read_cache: if self.user: results = self.browser.open(self.server_url + "/search?" + params) else: results = urllib2.urlopen(self.server_url + "/search?" + params) if 'youraccount/login' in results.geturl(): # Current user not able to search collection raise InvenioConnectorAuthError("You are trying to search a restricted collection. Please authenticate yourself.\n") else: return self.cached_queries[params + str(parse_results)] if parse_results: # FIXME: we should not try to parse if results is string parsed_records = self._parse_results(results, self.cached_records) self.cached_queries[params + str(parse_results)] = parsed_records return parsed_records else: # pylint: disable=E1103 # The whole point of the following code is to make sure we can # handle two types of variable. try: res = results.read() except AttributeError: res = results # pylint: enable=E1103 if of == "id": try: if type(res) is str: # Transform to list res = [int(recid.strip()) for recid in \ res.strip("[]").split(",") if recid.strip() != ""] res.reverse() except (ValueError, AttributeError): res = [] self.cached_queries[params + str(parse_results)] = res return self.cached_queries[params + str(parse_results)]
def cvify_records(recids, of, req=None, so='d'): """ Write a CV for records RECIDS in the format OF in language LN. REQ is the Apache/mod_python request object. """ # intbitsets don't support indexing, so we need a list from our hitset first recids = [hit for hit in recids] if so == 'd': recids.reverse() if of.startswith('h'): if of == 'hcv': format_records(recids, of=of, record_prefix=lambda count: '%d) ' % (count + 1), req=req) elif of == 'htcv': format_records(recids, of=of, record_prefix=lambda count: '%d) ' % (count + 1), req=req) elif of == 'tlcv': HEADER = r''' \documentclass{article} %%To use pdflatex, uncomment these lines, as well as the \href lines %%in each entry %%\usepackage[pdftex, %% colorlinks=true, %% urlcolor=blue, %% \href{...}{...} external (URL) %% filecolor=green, %% \href{...} local file %% linkcolor=red, %% \ref{...} and \pageref{...} %% pdftitle={Papers by AUTHOR}, %% pdfauthor={Your Name}, %% pdfsubject={Just a test}, %% pdfkeywords={test testing testable}, %% pagebackref, %% pdfpagemode=None, %% bookmarksopen=true]{hyperref} %%usepackage{arial} %%\renewcommand{\familydefault}{\sfdefault} %% San serif \renewcommand{\labelenumii}{\arabic{enumi}.\arabic{enumii}} \pagestyle{empty} \oddsidemargin 0.0in \textwidth 6.5in \topmargin -0.75in \textheight 9.5in \begin{document} \title{Papers by AUTHOR} \author{} \date{} \maketitle \begin{enumerate} %%%% LIST OF PAPERS %%%% Please comment out anything between here and the %%%% first \item %%%% Please send any updates or corrections to the list to %%%% %(email)s ''' % { 'email': CFG_SITE_SUPPORT_EMAIL, } FOOTER = r''' \end{enumerate} \end{document} ''' format_records(recids, of=of, prologue=HEADER, epilogue=FOOTER, req=req) return ''
def __call__(self, req, form): """RSS 2.0 feed service.""" # Keep only interesting parameters for the search default_params = websearch_templates.rss_default_urlargd # We need to keep 'jrec' and 'rg' here in order to have # 'multi-page' RSS. These parameters are not kept be default # as we don't want to consider them when building RSS links # from search and browse pages. default_params.update({'jrec':(int, 1), 'rg': (int, CFG_WEBSEARCH_INSTANT_BROWSE_RSS)}) argd = wash_urlargd(form, default_params) user_info = collect_user_info(req) for coll in argd['c'] + [argd['cc']]: if collection_restricted_p(coll): (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') # Create a standard filename with these parameters current_url = websearch_templates.build_rss_url(argd) cache_filename = current_url.split('/')[-1] # In the same way as previously, add 'jrec' & 'rg' req.content_type = "application/rss+xml" req.send_http_header() try: # Try to read from cache path = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename) # Check if cache needs refresh filedesc = open(path, "r") last_update_time = datetime.datetime.fromtimestamp(os.stat(os.path.abspath(path)).st_mtime) assert(datetime.datetime.now() < last_update_time + datetime.timedelta(minutes=CFG_WEBSEARCH_RSS_TTL)) c_rss = filedesc.read() filedesc.close() req.write(c_rss) return except Exception as e: # do it live and cache previous_url = None if argd['jrec'] > 1: prev_jrec = argd['jrec'] - argd['rg'] if prev_jrec < 1: prev_jrec = 1 previous_url = websearch_templates.build_rss_url(argd, jrec=prev_jrec) #check if the user has rights to set a high wildcard limit #if not, reduce the limit set by user, with the default one if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0): if acc_authorize_action(req, 'runbibedit')[0] != 0: argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT req.argd = argd recIDs = perform_request_search(req, of="id", c=argd['c'], cc=argd['cc'], p=argd['p'], f=argd['f'], p1=argd['p1'], f1=argd['f1'], m1=argd['m1'], op1=argd['op1'], p2=argd['p2'], f2=argd['f2'], m2=argd['m2'], op2=argd['op2'], p3=argd['p3'], f3=argd['f3'], m3=argd['m3'], wl=argd['wl']) nb_found = len(recIDs) next_url = None if len(recIDs) >= argd['jrec'] + argd['rg']: next_url = websearch_templates.build_rss_url(argd, jrec=(argd['jrec'] + argd['rg'])) first_url = websearch_templates.build_rss_url(argd, jrec=1) last_url = websearch_templates.build_rss_url(argd, jrec=nb_found - argd['rg'] + 1) recIDs = recIDs[-argd['jrec']:(-argd['rg'] - argd['jrec']):-1] rss_prologue = '<?xml version="1.0" encoding="UTF-8"?>\n' + \ websearch_templates.tmpl_xml_rss_prologue(current_url=current_url, previous_url=previous_url, next_url=next_url, first_url=first_url, last_url=last_url, nb_found=nb_found, jrec=argd['jrec'], rg=argd['rg'], cc=argd['cc']) + '\n' req.write(rss_prologue) rss_body = format_records(recIDs, of='xr', ln=argd['ln'], user_info=user_info, record_separator="\n", req=req, epilogue="\n") rss_epilogue = websearch_templates.tmpl_xml_rss_epilogue() + '\n' req.write(rss_epilogue) # update cache dirname = "%s/rss" % (CFG_CACHEDIR) mymkdir(dirname) fullfilename = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename) try: # Remove the file just in case it already existed # so that a bit of space is created os.remove(fullfilename) except OSError: pass # Check if there's enough space to cache the request. if len(os.listdir(dirname)) < CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS: try: os.umask(0o022) f = open(fullfilename, "w") f.write(rss_prologue + rss_body + rss_epilogue) f.close() except IOError as v: if v[0] == 36: # URL was too long. Never mind, don't cache pass else: raise repr(v)