示例#1
0
def _get_coauthors_fallback(personid, collabs):
    # python 2.4 does not supprt max() with key argument.
    # Please remove this function when python 2.6 is supported.
    def max_key(iterable, key):
        try:
            ret = iterable[0]
        except IndexError:
            return None
        for i in iterable[1:]:
            if key(i) > key(ret):
                ret = i
        return ret

    if collabs:
        query = 'exactauthor:"%s" and (%s)' % (personid, ' or '.join([('collaboration:"%s"' % x) for x in zip(*collabs)[0]]))
        exclude_recs = perform_request_search(rg=0, p=query)
    else:
        exclude_recs = []

    recids = perform_request_search(rg=0, p='exactauthor:"%s"' % str(personid))
    recids = list(set(recids) - set(exclude_recs))
    a = format_records(recids, 'WAPAFF')
    a = [pickle.loads(p) for p in a.split('!---THEDELIMITER---!') if p]
    coauthors = {}
    for rec, affs in a:
        keys = affs.keys()
        for n in keys:
            try:
                coauthors[n].add(rec)
            except KeyError:
                coauthors[n] = set([rec])

    coauthors = [(x, x, len(coauthors[x])) for x in coauthors if x.lower() != personid.lower()]
    return coauthors
示例#2
0
def response_formated_records(records, collection, of, **kwargs):
    """Return formatter records.

    Response contains correct Cache and TTL information in HTTP headers.
    """
    response = make_response(format_records(records, collection=collection,
                                            of=of, **kwargs))

    response.mimetype = get_output_format_content_type(of)
    current_time = datetime.datetime.now()
    response.headers['Last-Modified'] = http_date(
        time.mktime(current_time.timetuple())
    )
    expires = current_app.config.get(
        'CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT', None)

    if expires is None:
        response.headers['Cache-Control'] = (
            'no-store, no-cache, must-revalidate, '
            'post-check=0, pre-check=0, max-age=0'
        )
        response.headers['Expires'] = '-1'
    else:
        expires_time = current_time + datetime.timedelta(seconds=expires)
        response.headers['Vary'] = 'Accept'
        response.headers['Cache-Control'] = (
            'public' if current_user.is_guest else 'private'
        )
        response.headers['Expires'] = http_date(time.mktime(
            expires_time.timetuple()
        ))
    return response
示例#3
0
def response_formated_records(records, collection, of, **kwargs):
    """Return formatter records.

    Response contains correct Cache and TTL information in HTTP headers.
    """
    response = make_response(
        format_records(records, collection=collection, of=of, **kwargs))

    response.mimetype = get_output_format_content_type(of)
    current_time = datetime.datetime.now()
    response.headers['Last-Modified'] = http_date(
        time.mktime(current_time.timetuple()))
    expires = current_app.config.get('CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT',
                                     None)

    if expires is None:
        response.headers['Cache-Control'] = (
            'no-store, no-cache, must-revalidate, '
            'post-check=0, pre-check=0, max-age=0')
        response.headers['Expires'] = '-1'
    else:
        expires_time = current_time + datetime.timedelta(seconds=expires)
        response.headers['Vary'] = 'Accept'
        response.headers['Cache-Control'] = ('public' if current_user.is_guest
                                             else 'private')
        response.headers['Expires'] = http_date(
            time.mktime(expires_time.timetuple()))
    return response
示例#4
0
def _get_institute_pub_dict_fallback(recids, names_list, person_id):
    """return a dictionary consisting of institute -> list of publications"""

    recids = perform_request_search(rg=0, p='exactauthor:"%s"' % str(person_id))
    a = format_records(recids, 'WAPAFF')
    a = [pickle.loads(p) for p in a.split('!---THEDELIMITER---!') if p]
    affdict = {}
    for rec, affs in a:
        keys = affs.keys()
        for name in names_list:
            if name in keys and affs[name][0]:
                try:
                    affdict[affs[name][0]].add(rec)
                except KeyError:
                    affdict[affs[name][0]] = set([rec])
    return affdict
示例#5
0
def _get_institute_pub_dict_bai(recids, names_list, person_id):
    """return a dictionary consisting of institute -> list of publications"""
    try:
        cid = get_canonical_id_from_personid(person_id)[0][0]
    except IndexError:
        cid = person_id
    recids = perform_request_search(rg=0, p='author:%s' % str(cid))
    a = format_records(recids, 'WAPAFF')
    a = [pickle.loads(p) for p in a.split('!---THEDELIMITER---!') if p]
    affdict = {}
    for rec, affs in a:
        keys = affs.keys()
        for name in names_list:
            if name in keys and affs[name][0]:
                try:
                    affdict[affs[name][0]].add(rec)
                except KeyError:
                    affdict[affs[name][0]] = set([rec])
    return affdict
示例#6
0
def cvify_records(recids, of, req=None, so='d'):
    """
       Write a CV for records RECIDS in the format OF in language LN.
       REQ is the Apache/mod_python request object.
    """
    # intbitsets don't support indexing, so we need a list from our hitset first
    recids = [hit for hit in recids]
    if so == 'd':
        recids.reverse()
    if of.startswith('h'):
        if of == 'hcv':
            format_records(recids, of=of,
                           record_prefix=lambda count: '%d) ' % (count+1),
                           req=req)
        elif of == 'htcv':
            format_records(recids, of=of,
                           record_prefix=lambda count: '%d) ' % (count+1),
                           req=req)

    elif of == 'tlcv':
        HEADER = r'''
\documentclass{article}
%%To use pdflatex, uncomment these lines, as well as the \href lines
%%in each entry
%%\usepackage[pdftex,
%%       colorlinks=true,
%%       urlcolor=blue,       %% \href{...}{...} external (URL)
%%       filecolor=green,     %% \href{...} local file
%%       linkcolor=red,       %% \ref{...} and \pageref{...}
%%       pdftitle={Papers by AUTHOR},
%%       pdfauthor={Your Name},
%%       pdfsubject={Just a test},
%%       pdfkeywords={test testing testable},
%%       pagebackref,
%%       pdfpagemode=None,
%%        bookmarksopen=true]{hyperref}
%%usepackage{arial}
%%\renewcommand{\familydefault}{\sfdefault} %% San serif
\renewcommand{\labelenumii}{\arabic{enumi}.\arabic{enumii}}

\pagestyle{empty}
\oddsidemargin 0.0in
\textwidth 6.5in
\topmargin -0.75in
\textheight 9.5in

\begin{document}
\title{Papers by AUTHOR}
\author{}
\date{}
\maketitle
\begin{enumerate}

%%%%   LIST OF PAPERS
%%%%   Please comment out anything between here and the
%%%%   first \item
%%%%   Please send any updates or corrections to the list to
%%%%   %(email)s
''' % { 'email' : CFG_SITE_SUPPORT_EMAIL, }
        FOOTER = r'''
\end{enumerate}
\end{document}
'''
        format_records(recids, of=of,
                       prologue=HEADER,
                       epilogue=FOOTER,
                       req=req)

    return ''
示例#7
0
    def search(self, read_cache=True, **kwparams):
        """
        Returns records corresponding to the given search query.

        See docstring of invenio.legacy.search_engine.perform_request_search()
        for an overview of available parameters.

        @raise InvenioConnectorAuthError: if authentication fails
        """
        parse_results = False
        of = kwparams.get('of', "")
        if of == "":
            parse_results = True
            of = "xm"
            kwparams['of'] = of
        params = urllib.urlencode(kwparams, doseq=1)

        # Are we running locally? If so, better directly access the
        # search engine directly
        if self.local and of != 't':
            # See if user tries to search any restricted collection
            c = kwparams.get('c', "")
            if c != "":
                if type(c) is list:
                    colls = c
                else:
                    colls = [c]
                for collection in colls:
                    if collection_restricted_p(collection):
                        if self.user:
                            self._check_credentials()
                            continue
                        raise InvenioConnectorAuthError("You are trying to search a restricted collection. Please authenticate yourself.\n")
            kwparams['of'] = 'id'
            results = perform_request_search(**kwparams)
            if of.lower() != 'id':
                results = format_records(results, of)
        else:
            if params + str(parse_results) not in self.cached_queries or not read_cache:
                if self.user:
                    results = self.browser.open(self.server_url + "/search?" + params)
                else:
                    results = urllib2.urlopen(self.server_url + "/search?" + params)
                if 'youraccount/login' in results.geturl():
                    # Current user not able to search collection
                    raise InvenioConnectorAuthError("You are trying to search a restricted collection. Please authenticate yourself.\n")
            else:
                return self.cached_queries[params + str(parse_results)]

        if parse_results:
            # FIXME: we should not try to parse if results is string
            parsed_records = self._parse_results(results, self.cached_records)
            self.cached_queries[params + str(parse_results)] = parsed_records
            return parsed_records
        else:
            # pylint: disable=E1103
            # The whole point of the following code is to make sure we can
            # handle two types of variable.
            try:
                res = results.read()
            except AttributeError:
                res = results
            # pylint: enable=E1103

            if of == "id":
                try:
                    if type(res) is str:
                        # Transform to list
                        res = [int(recid.strip()) for recid in \
                        res.strip("[]").split(",") if recid.strip() != ""]
                    res.reverse()
                except (ValueError, AttributeError):
                    res = []
            self.cached_queries[params + str(parse_results)] = res
            return self.cached_queries[params + str(parse_results)]
示例#8
0
    def search(self, read_cache=True, **kwparams):
        """
        Returns records corresponding to the given search query.

        See docstring of invenio.legacy.search_engine.perform_request_search()
        for an overview of available parameters.

        @raise InvenioConnectorAuthError: if authentication fails
        """
        parse_results = False
        of = kwparams.get('of', "")
        if of == "":
            parse_results = True
            of = "xm"
            kwparams['of'] = of
        params = urllib.urlencode(kwparams, doseq=1)

        # Are we running locally? If so, better directly access the
        # search engine directly
        if self.local and of != 't':
            # See if user tries to search any restricted collection
            c = kwparams.get('c', "")
            if c != "":
                if type(c) is list:
                    colls = c
                else:
                    colls = [c]
                for collection in colls:
                    if collection_restricted_p(collection):
                        if self.user:
                            self._check_credentials()
                            continue
                        raise InvenioConnectorAuthError("You are trying to search a restricted collection. Please authenticate yourself.\n")
            kwparams['of'] = 'id'
            results = perform_request_search(**kwparams)
            if of.lower() != 'id':
                results = format_records(results, of)
        else:
            if params + str(parse_results) not in self.cached_queries or not read_cache:
                if self.user:
                    results = self.browser.open(self.server_url + "/search?" + params)
                else:
                    results = urllib2.urlopen(self.server_url + "/search?" + params)
                if 'youraccount/login' in results.geturl():
                    # Current user not able to search collection
                    raise InvenioConnectorAuthError("You are trying to search a restricted collection. Please authenticate yourself.\n")
            else:
                return self.cached_queries[params + str(parse_results)]

        if parse_results:
            # FIXME: we should not try to parse if results is string
            parsed_records = self._parse_results(results, self.cached_records)
            self.cached_queries[params + str(parse_results)] = parsed_records
            return parsed_records
        else:
            # pylint: disable=E1103
            # The whole point of the following code is to make sure we can
            # handle two types of variable.
            try:
                res = results.read()
            except AttributeError:
                res = results
            # pylint: enable=E1103

            if of == "id":
                try:
                    if type(res) is str:
                        # Transform to list
                        res = [int(recid.strip()) for recid in \
                        res.strip("[]").split(",") if recid.strip() != ""]
                    res.reverse()
                except (ValueError, AttributeError):
                    res = []
            self.cached_queries[params + str(parse_results)] = res
            return self.cached_queries[params + str(parse_results)]
示例#9
0
def cvify_records(recids, of, req=None, so='d'):
    """
       Write a CV for records RECIDS in the format OF in language LN.
       REQ is the Apache/mod_python request object.
    """
    # intbitsets don't support indexing, so we need a list from our hitset first
    recids = [hit for hit in recids]
    if so == 'd':
        recids.reverse()
    if of.startswith('h'):
        if of == 'hcv':
            format_records(recids,
                           of=of,
                           record_prefix=lambda count: '%d) ' % (count + 1),
                           req=req)
        elif of == 'htcv':
            format_records(recids,
                           of=of,
                           record_prefix=lambda count: '%d) ' % (count + 1),
                           req=req)

    elif of == 'tlcv':
        HEADER = r'''
\documentclass{article}
%%To use pdflatex, uncomment these lines, as well as the \href lines
%%in each entry
%%\usepackage[pdftex,
%%       colorlinks=true,
%%       urlcolor=blue,       %% \href{...}{...} external (URL)
%%       filecolor=green,     %% \href{...} local file
%%       linkcolor=red,       %% \ref{...} and \pageref{...}
%%       pdftitle={Papers by AUTHOR},
%%       pdfauthor={Your Name},
%%       pdfsubject={Just a test},
%%       pdfkeywords={test testing testable},
%%       pagebackref,
%%       pdfpagemode=None,
%%        bookmarksopen=true]{hyperref}
%%usepackage{arial}
%%\renewcommand{\familydefault}{\sfdefault} %% San serif
\renewcommand{\labelenumii}{\arabic{enumi}.\arabic{enumii}}

\pagestyle{empty}
\oddsidemargin 0.0in
\textwidth 6.5in
\topmargin -0.75in
\textheight 9.5in

\begin{document}
\title{Papers by AUTHOR}
\author{}
\date{}
\maketitle
\begin{enumerate}

%%%%   LIST OF PAPERS
%%%%   Please comment out anything between here and the
%%%%   first \item
%%%%   Please send any updates or corrections to the list to
%%%%   %(email)s
''' % {
            'email': CFG_SITE_SUPPORT_EMAIL,
        }
        FOOTER = r'''
\end{enumerate}
\end{document}
'''
        format_records(recids,
                       of=of,
                       prologue=HEADER,
                       epilogue=FOOTER,
                       req=req)

    return ''
示例#10
0
    def __call__(self, req, form):
        """RSS 2.0 feed service."""

        # Keep only interesting parameters for the search
        default_params = websearch_templates.rss_default_urlargd
        # We need to keep 'jrec' and 'rg' here in order to have
        # 'multi-page' RSS. These parameters are not kept be default
        # as we don't want to consider them when building RSS links
        # from search and browse pages.
        default_params.update({'jrec':(int, 1),
                               'rg': (int, CFG_WEBSEARCH_INSTANT_BROWSE_RSS)})
        argd = wash_urlargd(form, default_params)
        user_info = collect_user_info(req)

        for coll in argd['c'] + [argd['cc']]:
            if collection_restricted_p(coll):
                (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll)
                if auth_code and user_info['email'] == 'guest':
                    cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll})
                    target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                            make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {})
                    return redirect_to_url(req, target, norobot=True)
                elif auth_code:
                    return page_not_authorized(req, "../", \
                        text=auth_msg, \
                        navmenuid='search')

        # Create a standard filename with these parameters
        current_url = websearch_templates.build_rss_url(argd)
        cache_filename = current_url.split('/')[-1]

        # In the same way as previously, add 'jrec' & 'rg'

        req.content_type = "application/rss+xml"
        req.send_http_header()
        try:
            # Try to read from cache
            path = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename)
            # Check if cache needs refresh
            filedesc = open(path, "r")
            last_update_time = datetime.datetime.fromtimestamp(os.stat(os.path.abspath(path)).st_mtime)
            assert(datetime.datetime.now() < last_update_time + datetime.timedelta(minutes=CFG_WEBSEARCH_RSS_TTL))
            c_rss = filedesc.read()
            filedesc.close()
            req.write(c_rss)
            return
        except Exception as e:
            # do it live and cache

            previous_url = None
            if argd['jrec'] > 1:
                prev_jrec = argd['jrec'] - argd['rg']
                if prev_jrec < 1:
                    prev_jrec = 1
                previous_url = websearch_templates.build_rss_url(argd,
                                                                 jrec=prev_jrec)

            #check if the user has rights to set a high wildcard limit
            #if not, reduce the limit set by user, with the default one
            if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0):
                if acc_authorize_action(req, 'runbibedit')[0] != 0:
                    argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT

            req.argd = argd
            recIDs = perform_request_search(req, of="id",
                                                          c=argd['c'], cc=argd['cc'],
                                                          p=argd['p'], f=argd['f'],
                                                          p1=argd['p1'], f1=argd['f1'],
                                                          m1=argd['m1'], op1=argd['op1'],
                                                          p2=argd['p2'], f2=argd['f2'],
                                                          m2=argd['m2'], op2=argd['op2'],
                                                          p3=argd['p3'], f3=argd['f3'],
                                                          m3=argd['m3'], wl=argd['wl'])
            nb_found = len(recIDs)
            next_url = None
            if len(recIDs) >= argd['jrec'] + argd['rg']:
                next_url = websearch_templates.build_rss_url(argd,
                                                             jrec=(argd['jrec'] + argd['rg']))

            first_url = websearch_templates.build_rss_url(argd, jrec=1)
            last_url = websearch_templates.build_rss_url(argd, jrec=nb_found - argd['rg'] + 1)

            recIDs = recIDs[-argd['jrec']:(-argd['rg'] - argd['jrec']):-1]

            rss_prologue = '<?xml version="1.0" encoding="UTF-8"?>\n' + \
            websearch_templates.tmpl_xml_rss_prologue(current_url=current_url,
                                                      previous_url=previous_url,
                                                      next_url=next_url,
                                                      first_url=first_url, last_url=last_url,
                                                      nb_found=nb_found,
                                                      jrec=argd['jrec'], rg=argd['rg'],
                                                      cc=argd['cc']) + '\n'
            req.write(rss_prologue)
            rss_body = format_records(recIDs,
                                      of='xr',
                                      ln=argd['ln'],
                                      user_info=user_info,
                                      record_separator="\n",
                                      req=req, epilogue="\n")
            rss_epilogue = websearch_templates.tmpl_xml_rss_epilogue() + '\n'
            req.write(rss_epilogue)

            # update cache
            dirname = "%s/rss" % (CFG_CACHEDIR)
            mymkdir(dirname)
            fullfilename = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename)
            try:
                # Remove the file just in case it already existed
                # so that a bit of space is created
                os.remove(fullfilename)
            except OSError:
                pass

            # Check if there's enough space to cache the request.
            if len(os.listdir(dirname)) < CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS:
                try:
                    os.umask(0o022)
                    f = open(fullfilename, "w")
                    f.write(rss_prologue + rss_body + rss_epilogue)
                    f.close()
                except IOError as v:
                    if v[0] == 36:
                        # URL was too long. Never mind, don't cache
                        pass
                    else:
                        raise repr(v)