Exemplo n.º 1
0
     3: _("File already exists"),
     4: _("A file with the same name and format already exists"),
     5: _("No rights to upload to collection '%s'")
 }
 # Create directory DONE/ if doesn't exist
 folder = (folder[-1] == "/") and folder or (folder + "/")
 files_done_dir = folder + "DONE/"
 try:
     os.mkdir(files_done_dir)
 except OSError:
     # Directory exists or no write permission
     pass
 for docfile in files:
     if os.path.isfile(os.path.join(folder, docfile)):
         info[0] += 1
         identifier = file_strip_ext(docfile)
         extension = docfile[len(identifier):]
         rec_id = None
         if identifier:
             if matching == 'smart':
                 for comment, pattern, fields in FILE_SEARCH_PATTERN:
                     g = pattern.match(docfile)
                     if g:
                         query = ' '.join(
                             map(
                                 lambda key, value: '%s:"%s"' %
                                 (key, value), fields, g.groups()))
                         write_message("%s smart matched via %s: %s" %
                                       (docfile, comment, query))
                         rec_id = search_pattern(p=query)
                         break
Exemplo n.º 2
0
    def _lookup(self, component, path):
        # after /<CFG_SITE_RECORD>/<recid>/files/ every part is used as the file
        # name
        filename = component

        def getfile(req, form):
            args = wash_urlargd(form, websubmit_templates.files_default_urlargd)
            ln = args["ln"]

            _ = gettext_set_language(ln)

            uid = getUid(req)
            user_info = collect_user_info(req)

            verbose = args["verbose"]
            if verbose >= 1 and not isUserSuperAdmin(user_info):
                # Only SuperUser can see all the details!
                verbose = 0

            if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1:
                return page_not_authorized(req, "/%s/%s" % (CFG_SITE_RECORD, self.recid), navmenuid="submit")

            if record_exists(self.recid) < 1:
                msg = "<p>%s</p>" % _("Requested record does not seem to exist.")
                return warningMsg(msg, req, CFG_SITE_NAME, ln)

            if record_empty(self.recid):
                msg = "<p>%s</p>" % _("Requested record does not seem to have been integrated.")
                return warningMsg(msg, req, CFG_SITE_NAME, ln)

            (auth_code, auth_message) = check_user_can_view_record(user_info, self.recid)
            if auth_code and user_info["email"] == "guest":
                cookie = mail_cookie_create_authorize_action(
                    VIEWRESTRCOLL, {"collection": guess_primary_collection_of_a_record(self.recid)}
                )
                target = "/youraccount/login" + make_canonical_urlargd(
                    {"action": cookie, "ln": ln, "referer": CFG_SITE_URL + user_info["uri"]}, {}
                )
                return redirect_to_url(req, target, norobot=True)
            elif auth_code:
                return page_not_authorized(req, "../", text=auth_message)

            readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1

            # From now on: either the user provided a specific file
            # name (and a possible version), or we return a list of
            # all the available files. In no case are the docids
            # visible.
            try:
                bibarchive = BibRecDocs(self.recid)
            except InvenioWebSubmitFileError, e:
                register_exception(req=req, alert_admin=True)
                msg = "<p>%s</p><p>%s</p>" % (
                    _("The system has encountered an error in retrieving the list of files for this document."),
                    _("The error has been logged and will be taken in consideration as soon as possible."),
                )
                return warningMsg(msg, req, CFG_SITE_NAME, ln)

            if bibarchive.deleted_p():
                return print_warning(req, _("Requested record does not seem to exist."))

            docname = ""
            format = ""
            version = ""
            warn = ""

            if filename:
                # We know the complete file name, guess which docid it
                # refers to
                ## TODO: Change the extension system according to ext.py from setlink
                ##       and have a uniform extension mechanism...
                docname = file_strip_ext(filename)
                format = filename[len(docname) :]
                if format and format[0] != ".":
                    format = "." + format
                if args["subformat"]:
                    format += ";%s" % args["subformat"]
            else:
                docname = args["docname"]

            if not format:
                format = args["format"]
                if args["subformat"]:
                    format += ";%s" % args["subformat"]

            if not version:
                version = args["version"]

            # version could be either empty, or all or an integer
            try:
                int(version)
            except ValueError:
                if version != "all":
                    version = ""

            display_hidden = isUserSuperAdmin(user_info)

            if version != "all":
                # search this filename in the complete list of files
                for doc in bibarchive.list_bibdocs():
                    if docname == doc.get_docname():
                        try:
                            docfile = doc.get_file(format, version)
                            (auth_code, auth_message) = docfile.is_restricted(user_info)
                            if auth_code != 0 and not is_user_owner_of_record(user_info, self.recid):
                                if CFG_WEBSUBMIT_ICON_SUBFORMAT_RE.match(get_subformat_from_format(format)):
                                    return stream_restricted_icon(req)
                                if user_info["email"] == "guest":
                                    cookie = mail_cookie_create_authorize_action(
                                        "viewrestrdoc", {"status": docfile.get_status()}
                                    )
                                    target = "/youraccount/login" + make_canonical_urlargd(
                                        {"action": cookie, "ln": ln, "referer": CFG_SITE_URL + user_info["uri"]}, {}
                                    )
                                    redirect_to_url(req, target)
                                else:
                                    req.status = apache.HTTP_UNAUTHORIZED
                                    warn += print_warning(_("This file is restricted: ") + auth_message)
                                    break

                            if not docfile.hidden_p():
                                if not readonly:
                                    ip = str(req.remote_ip)
                                    res = doc.register_download(ip, version, format, uid)
                                try:
                                    return docfile.stream(req)
                                except InvenioWebSubmitFileError, msg:
                                    register_exception(req=req, alert_admin=True)
                                    req.status = apache.HTTP_INTERNAL_SERVER_ERROR
                                    return warningMsg(
                                        _("An error has happened in trying to stream the request file."),
                                        req,
                                        CFG_SITE_NAME,
                                        ln,
                                    )
                            else:
                                req.status = apache.HTTP_UNAUTHORIZED
                                warn = print_warning(_("The requested file is hidden and can not be accessed."))

                        except InvenioWebSubmitFileError, msg:
                            register_exception(req=req, alert_admin=True)
Exemplo n.º 3
0
        def getfile(req, form):
            args = wash_urlargd(form,
                                bibdocfile_templates.files_default_urlargd)
            ln = args['ln']

            _ = gettext_set_language(ln)

            uid = getUid(req)
            user_info = collect_user_info(req)

            verbose = args['verbose']
            if verbose >= 1 and not isUserSuperAdmin(user_info):
                # Only SuperUser can see all the details!
                verbose = 0

            if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1:
                return page_not_authorized(req,
                                           "/%s/%s" %
                                           (CFG_SITE_RECORD, self.recid),
                                           navmenuid='submit')

            if record_exists(self.recid) < 1:
                msg = "<p>%s</p>" % _(
                    "Requested record does not seem to exist.")
                return warning_page(msg, req, ln)

            if record_empty(self.recid):
                msg = "<p>%s</p>" % _(
                    "Requested record does not seem to have been integrated.")
                return warning_page(msg, req, ln)

            (auth_code,
             auth_message) = check_user_can_view_record(user_info, self.recid)
            if auth_code and user_info['email'] == 'guest':
                if webjournal_utils.is_recid_in_released_issue(self.recid):
                    # We can serve the file
                    pass
                else:
                    cookie = mail_cookie_create_authorize_action(
                        VIEWRESTRCOLL, {
                            'collection':
                            guess_primary_collection_of_a_record(self.recid)
                        })
                    target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                             make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \
                                                     CFG_SITE_SECURE_URL + user_info['uri']}, {})
                    return redirect_to_url(req, target, norobot=True)
            elif auth_code:
                if webjournal_utils.is_recid_in_released_issue(self.recid):
                    # We can serve the file
                    pass
                else:
                    return page_not_authorized(req, "../", \
                                               text = auth_message)

            readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1

            # From now on: either the user provided a specific file
            # name (and a possible version), or we return a list of
            # all the available files. In no case are the docids
            # visible.
            try:
                bibarchive = BibRecDocs(self.recid)
            except InvenioBibDocFileError:
                register_exception(req=req, alert_admin=True)
                msg = "<p>%s</p><p>%s</p>" % (
                    _("The system has encountered an error in retrieving the list of files for this document."
                      ),
                    _("The error has been logged and will be taken in consideration as soon as possible."
                      ))
                return warning_page(msg, req, ln)

            if bibarchive.deleted_p():
                req.status = apache.HTTP_GONE
                return warning_page(
                    _("Requested record does not seem to exist."), req, ln)

            docname = ''
            docformat = ''
            version = ''
            warn = ''

            if filename:
                # We know the complete file name, guess which docid it
                # refers to
                ## TODO: Change the extension system according to ext.py from setlink
                ##       and have a uniform extension mechanism...
                docname = file_strip_ext(filename)
                docformat = filename[len(docname):]
                if docformat and docformat[0] != '.':
                    docformat = '.' + docformat
                if args['subformat']:
                    docformat += ';%s' % args['subformat']
            else:
                docname = args['docname']

            if not docformat:
                docformat = args['format']
                if args['subformat']:
                    docformat += ';%s' % args['subformat']

            if not version:
                version = args['version']

            ## Download as attachment
            is_download = False
            if args['download']:
                is_download = True

            # version could be either empty, or all or an integer
            try:
                int(version)
            except ValueError:
                if version != 'all':
                    version = ''

            display_hidden = isUserSuperAdmin(user_info)

            if version != 'all':
                # search this filename in the complete list of files
                for doc in bibarchive.list_bibdocs():
                    if docname == bibarchive.get_docname(doc.id):
                        try:
                            try:
                                docfile = doc.get_file(docformat, version)
                            except InvenioBibDocFileError, msg:
                                req.status = apache.HTTP_NOT_FOUND
                                if not CFG_INSPIRE_SITE and req.headers_in.get(
                                        'referer'):
                                    ## There must be a broken link somewhere.
                                    ## Maybe it's good to alert the admin
                                    register_exception(req=req,
                                                       alert_admin=True)
                                warn += write_warning(
                                    _("The format %s does not exist for the given version: %s"
                                      ) % (cgi.escape(docformat),
                                           cgi.escape(str(msg))))
                                break
                            (auth_code,
                             auth_message) = docfile.is_restricted(user_info)
                            if auth_code != 0 and not is_user_owner_of_record(
                                    user_info, self.recid):
                                if CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(
                                        get_subformat_from_format(docformat)):
                                    return stream_restricted_icon(req)
                                if user_info['email'] == 'guest':
                                    cookie = mail_cookie_create_authorize_action(
                                        'viewrestrdoc',
                                        {'status': docfile.get_status()})
                                    target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                                    make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \
                                        CFG_SITE_SECURE_URL + user_info['uri']}, {})
                                    redirect_to_url(req, target)
                                else:
                                    req.status = apache.HTTP_UNAUTHORIZED
                                    warn += write_warning(
                                        _("This file is restricted: ") +
                                        str(auth_message))
                                    break

                            if not docfile.hidden_p():
                                if not readonly:
                                    ip = str(req.remote_ip)
                                    doc.register_download(
                                        ip, docfile.get_version(), docformat,
                                        uid, self.recid)
                                try:
                                    return docfile.stream(req,
                                                          download=is_download)
                                except InvenioBibDocFileError, msg:
                                    register_exception(req=req,
                                                       alert_admin=True)
                                    req.status = apache.HTTP_INTERNAL_SERVER_ERROR
                                    warn += write_warning(
                                        _("An error has happened in trying to stream the request file."
                                          ))
                            else:
                                req.status = apache.HTTP_UNAUTHORIZED
                                warn += write_warning(
                                    _("The requested file is hidden and can not be accessed."
                                      ))

                        except InvenioBibDocFileError, msg:
                            register_exception(req=req, alert_admin=True)
Exemplo n.º 4
0
def get_files(bfo, distinguish_main_and_additional_files=True):
    """
    Returns the files available for the given record.
    Returned structure is a tuple (parsed_urls, old_versions, additionals):
     - parsed_urls: contains categorized URLS (see details below)
     - old_versions: set to True if we can have access to old versions
     - additionals: set to True if we have other documents than the 'main' document

    'parsed_urls' is a dictionary in the form:
    {'main_urls' : {'Main'      : [('http://CFG_SITE_URL/record/1/files/aFile.pdf', 'aFile', 'PDF'),
                                   ('http://CFG_SITE_URL/record/1/files/aFile.gif', 'aFile', 'GIF')],
                    'Additional': [('http://CFG_SITE_URL/record/1/files/bFile.pdf', 'bFile', 'PDF')]},

     'other_urls': [('http://externalurl.com/aFile.pdf', 'Fulltext'),      # url(8564_u), description(8564_z/y)
                    ('http://externalurl.com/bFile.pdf', 'Fulltext')],

     'cern_urls' : [('http://cern.ch/aFile.pdf', 'Fulltext'),              # url(8564_u), description(8564_z/y)
                    ('http://cern.ch/bFile.pdf', 'Fulltext')],
    }

    Some notes about returned structure:
    - key 'cern_urls' is only available on CERN site
    - keys in main_url dictionaries are defined by the BibDoc.
    - older versions are not part of the parsed urls
    - returns only main files when possible, that is when doctypes
      make a distinction between 'Main' files and other
      files. Otherwise returns all the files as main. This is only
      enabled if distinguish_main_and_additional_files is set to True
    """
    _ = gettext_set_language(bfo.lang)

    urls = bfo.fields("8564_")
    bibarchive = BibRecDocs(bfo.recID)

    old_versions = False # We can provide link to older files. Will be
                         # set to True if older files are found.
    additionals = False  # We have additional files. Will be set to
                         # True if additional files are found.

    # Prepare object to return
    parsed_urls = {'main_urls':{},    # Urls hosted by Invenio (bibdocs)
                  'others_urls':[]    # External urls
                  }
    if CFG_CERN_SITE:
        parsed_urls['cern_urls'] = [] # cern.ch urls

    # Doctypes can of any type, but when there is one file marked as
    # 'Main', we consider that there is a distinction between "main"
    # and "additional" files. Otherwise they will all be considered
    # equally as main files
    distinct_main_and_additional_files = False
    if len(bibarchive.list_bibdocs(doctype='Main')) > 0 and \
           distinguish_main_and_additional_files:
        distinct_main_and_additional_files = True

    # Parse URLs
    for complete_url in urls:
        if complete_url.has_key('u'):
            url = complete_url['u']
            (dummy, host, path, dummy, params, dummy) = urlparse(url)
            filename = urllib.unquote(basename(path))
            name = file_strip_ext(filename)
            format = filename[len(name):]
            if format.startswith('.'):
                format = format[1:]

            descr = ''
            if complete_url.has_key('y'):
                descr = complete_url['y']
            if not url.startswith(CFG_SITE_URL): # Not a bibdoc?
                if not descr: # For not bibdoc let's have a description
                    # Display the URL in full:
                    descr = url
                if CFG_CERN_SITE and 'cern.ch' in host and \
                       ('/setlink?' in url or \
                        'cms' in host or \
                        'documents.cern.ch' in url or \
                        'doc.cern.ch' in url or \
                        'preprints.cern.ch' in url):
                    url_params_dict = dict([part.split('=') for part in params.split('&') \
                                            if len(part) == 2])
                    if url_params_dict.has_key('categ') and \
                           (url_params_dict['categ'].split('.', 1)[0] in cern_arxiv_categories) and \
                           url_params_dict.has_key('id'):
                        # Old arXiv links, used to be handled by
                        # setlink. Provide direct links to arXiv
                        for file_format, label in [('pdf', "PDF")]:#,
                            #('ps', "PS"),
                            #('e-print', "Source (generally TeX or LaTeX)"),
                            #('abs', "Abstract")]:
                            url = "http://arxiv.org/%(format)s/%(category)s/%(id)s" % \
                                  {'format': file_format,
                                   'category': url_params_dict['categ'],
                                   'id': url_params_dict['id']}
                            parsed_urls['others_urls'].append((url, "%s/%s %s" % \
                                                               (url_params_dict['categ'],
                                                                url_params_dict['id'],
                                                                label)))
                else:
                    parsed_urls['others_urls'].append((url, descr)) # external url
            else: # It's a bibdoc!
                assigned = False
                for doc in bibarchive.list_bibdocs():
                    if int(doc.get_latest_version()) > 1:
                        old_versions = True
                    if True in [f.fullname.startswith(filename) \
                                for f in doc.list_all_files()]:
                        assigned = True
                        #doc.getIcon()
                        if not doc.doctype == 'Main' and \
                               distinct_main_and_additional_files == True:
                            # In that case we record that there are
                            # additional files, but don't add them to
                            # returned structure.
                            additionals = True
                        else:
                            if not descr:
                                descr = _('Fulltext')
                            if not parsed_urls['main_urls'].has_key(descr):
                                parsed_urls['main_urls'][descr] = []
                            parsed_urls['main_urls'][descr].append((url, name, format))
                if not assigned: # Url is not a bibdoc :-S
                    if not descr:
                        descr = filename
                    parsed_urls['others_urls'].append((url, descr)) # Let's put it in a general other url

    return (parsed_urls, old_versions, additionals)
     return errors, info
 err_desc = {1: _("More than one possible recID, ambiguous behaviour"), 2: _("No records match that file name"),
             3: _("File already exists"), 4: _("A file with the same name and format already exists"),
             5: _("No rights to upload to collection '%s'")}
 # Create directory DONE/ if doesn't exist
 folder = (folder[-1] == "/") and folder or (folder + "/")
 files_done_dir = folder + "DONE/"
 try:
     os.mkdir(files_done_dir)
 except OSError:
     # Directory exists or no write permission
     pass
 for docfile in files:
     if os.path.isfile(os.path.join(folder, docfile)):
         info[0] += 1
         identifier = file_strip_ext(docfile)
         extension = docfile[len(identifier):]
         rec_id = None
         if identifier:
             rec_id = search_pattern(p=identifier, f=matching, m='e')
         if not rec_id:
             errors.append((docfile, err_desc[2]))
             continue
         elif len(rec_id) > 1:
             errors.append((docfile, err_desc[1]))
             continue
         else:
             rec_id = str(list(rec_id)[0])
         rec_info = BibRecDocs(rec_id)
         if rec_info.bibdocs:
             for bibdoc in rec_info.bibdocs:
Exemplo n.º 6
0
        def getfile(req, form):
            args = wash_urlargd(form, bibdocfile_templates.files_default_urlargd)
            ln = args['ln']

            _ = gettext_set_language(ln)

            uid = getUid(req)
            user_info = collect_user_info(req)

            verbose = args['verbose']
            if verbose >= 1 and not isUserSuperAdmin(user_info):
                # Only SuperUser can see all the details!
                verbose = 0

            if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1:
                return page_not_authorized(req, "/%s/%s" % (CFG_SITE_RECORD, self.recid),
                                           navmenuid='submit')

            if record_exists(self.recid) < 1:
                msg = "<p>%s</p>" % _("Requested record does not seem to exist.")
                return warning_page(msg, req, ln)

            if record_empty(self.recid):
                msg = "<p>%s</p>" % _("Requested record does not seem to have been integrated.")
                return warning_page(msg, req, ln)

            (auth_code, auth_message) = check_user_can_view_record(user_info, self.recid)
            if auth_code and user_info['email'] == 'guest':
                if webjournal_utils.is_recid_in_released_issue(self.recid):
                    # We can serve the file
                    pass
                else:
                    cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
                    target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                             make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \
                                                     CFG_SITE_SECURE_URL + user_info['uri']}, {})
                    return redirect_to_url(req, target, norobot=True)
            elif auth_code:
                if webjournal_utils.is_recid_in_released_issue(self.recid):
                    # We can serve the file
                    pass
                else:
                    return page_not_authorized(req, "../", \
                                               text = auth_message)


            readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1

            # From now on: either the user provided a specific file
            # name (and a possible version), or we return a list of
            # all the available files. In no case are the docids
            # visible.
            try:
                bibarchive = BibRecDocs(self.recid)
            except InvenioBibDocFileError:
                register_exception(req=req, alert_admin=True)
                msg = "<p>%s</p><p>%s</p>" % (
                    _("The system has encountered an error in retrieving the list of files for this document."),
                    _("The error has been logged and will be taken in consideration as soon as possible."))
                return warning_page(msg, req, ln)

            if bibarchive.deleted_p():
                req.status = apache.HTTP_GONE
                return warning_page(_("Requested record does not seem to exist."), req, ln)

            docname = ''
            docformat = ''
            version = ''
            warn = ''

            if filename:
                # We know the complete file name, guess which docid it
                # refers to
                ## TODO: Change the extension system according to ext.py from setlink
                ##       and have a uniform extension mechanism...
                docname = file_strip_ext(filename)
                docformat = filename[len(docname):]
                if docformat and docformat[0] != '.':
                    docformat = '.' + docformat
                if args['subformat']:
                    docformat += ';%s' % args['subformat']
            else:
                docname = args['docname']

            if not docformat:
                docformat = args['format']
                if args['subformat']:
                    docformat += ';%s' % args['subformat']

            if not version:
                version = args['version']

            ## Download as attachment
            is_download = False
            if args['download']:
                is_download = True

            # version could be either empty, or all or an integer
            try:
                int(version)
            except ValueError:
                if version != 'all':
                    version = ''

            display_hidden = isUserSuperAdmin(user_info)

            if version != 'all':
                # search this filename in the complete list of files
                for doc in bibarchive.list_bibdocs():
                    if docname == bibarchive.get_docname(doc.id):
                        try:
                            try:
                                docfile = doc.get_file(docformat, version)
                            except InvenioBibDocFileError, msg:
                                req.status = apache.HTTP_NOT_FOUND
                                if req.headers_in.get('referer'):
                                    ## There must be a broken link somewhere.
                                    ## Maybe it's good to alert the admin
                                    register_exception(req=req, alert_admin=True)
                                warn += write_warning(_("The format %s does not exist for the given version: %s") % (cgi.escape(docformat), cgi.escape(str(msg))))
                                break
                            (auth_code, auth_message) = docfile.is_restricted(user_info)
                            if auth_code != 0 and not is_user_owner_of_record(user_info, self.recid):
                                if CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(get_subformat_from_format(docformat)):
                                    return stream_restricted_icon(req)
                                if user_info['email'] == 'guest':
                                    cookie = mail_cookie_create_authorize_action('viewrestrdoc', {'status' : docfile.get_status()})
                                    target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                                    make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \
                                        CFG_SITE_SECURE_URL + user_info['uri']}, {})
                                    redirect_to_url(req, target)
                                else:
                                    req.status = apache.HTTP_UNAUTHORIZED
                                    warn += write_warning(_("This file is restricted: ") + str(auth_message))
                                    break

                            if not docfile.hidden_p():
                                if not readonly:
                                    ip = str(req.remote_ip)
                                    doc.register_download(ip, version, docformat, uid)
                                try:
                                    return docfile.stream(req, download=is_download)
                                except InvenioBibDocFileError, msg:
                                    register_exception(req=req, alert_admin=True)
                                    req.status = apache.HTTP_INTERNAL_SERVER_ERROR
                                    warn += write_warning(_("An error has happened in trying to stream the request file."))
                            else:
                                req.status = apache.HTTP_UNAUTHORIZED
                                warn += write_warning(_("The requested file is hidden and can not be accessed."))

                        except InvenioBibDocFileError, msg:
                            register_exception(req=req, alert_admin=True)
Exemplo n.º 7
0
def get_files(bfo, distinguish_main_and_additional_files=True):
    """
    Returns the files available for the given record.
    Returned structure is a tuple (parsed_urls, old_versions, additionals):
     - parsed_urls: contains categorized URLS (see details below)
     - old_versions: set to True if we can have access to old versions
     - additionals: set to True if we have other documents than the 'main' document

    'parsed_urls' is a dictionary in the form:
    {'main_urls' : {'Main'      : [('http://CFG_SITE_URL/record/1/files/aFile.pdf', 'aFile', 'PDF'),
                                   ('http://CFG_SITE_URL/record/1/files/aFile.gif', 'aFile', 'GIF')],
                    'Additional': [('http://CFG_SITE_URL/record/1/files/bFile.pdf', 'bFile', 'PDF')]},

     'other_urls': [('http://externalurl.com/aFile.pdf', 'Fulltext'),      # url(8564_u), description(8564_z/y)
                    ('http://externalurl.com/bFile.pdf', 'Fulltext')],

     'cern_urls' : [('http://cern.ch/aFile.pdf', 'Fulltext'),              # url(8564_u), description(8564_z/y)
                    ('http://cern.ch/bFile.pdf', 'Fulltext')],
    }

    Some notes about returned structure:
    - key 'cern_urls' is only available on CERN site
    - keys in main_url dictionaries are defined by the BibDoc.
    - older versions are not part of the parsed urls
    - returns only main files when possible, that is when doctypes
      make a distinction between 'Main' files and other
      files. Otherwise returns all the files as main. This is only
      enabled if distinguish_main_and_additional_files is set to True
    """
    _ = gettext_set_language(bfo.lang)

    urls = bfo.fields("8564_")
    bibarchive = BibRecDocs(bfo.recID)

    old_versions = False  # We can provide link to older files. Will be
    # set to True if older files are found.
    additionals = False  # We have additional files. Will be set to
    # True if additional files are found.

    # Prepare object to return
    parsed_urls = {
        'main_urls': {},  # Urls hosted by Invenio (bibdocs)
        'others_urls': []  # External urls
    }
    if CFG_CERN_SITE:
        parsed_urls['cern_urls'] = []  # cern.ch urls

    # Doctypes can of any type, but when there is one file marked as
    # 'Main', we consider that there is a distinction between "main"
    # and "additional" files. Otherwise they will all be considered
    # equally as main files
    distinct_main_and_additional_files = False
    if len(bibarchive.list_bibdocs(doctype='Main')) > 0 and \
           distinguish_main_and_additional_files:
        distinct_main_and_additional_files = True

    # Parse URLs
    for complete_url in urls:
        if complete_url.has_key('u'):
            url = complete_url['u']
            (dummy, host, path, dummy, params, dummy) = urlparse(url)
            filename = urllib.unquote(basename(path))
            name = file_strip_ext(filename)
            format = filename[len(name):]
            if format.startswith('.'):
                format = format[1:]

            descr = ''
            # VS change y to 3 and add z and q also if exists
            if complete_url.has_key('3'):
                if complete_url.has_key('z') and complete_url.has_key('q'):
                    descr = complete_url['3'] + ' [' + complete_url[
                        'q'] + ']' + ' [' + complete_url['z'] + ']'
                elif complete_url.has_key('z'):
                    descr = complete_url['3'] + ' [' + complete_url['z'] + ']'
                elif complete_url.has_key('q'):
                    descr = complete_url['3'] + ' [' + complete_url['q'] + ']'
                else:
                    descr = complete_url['3']
            if not url.startswith(CFG_SITE_URL):  # Not a bibdoc?
                if not descr:  # For not bibdoc let's have a description
                    # Display the URL in full:
                    descr = url
                if CFG_CERN_SITE and 'cern.ch' in host and \
                       ('/setlink?' in url or \
                        'cms' in host or \
                        'documents.cern.ch' in url or \
                        'doc.cern.ch' in url or \
                        'preprints.cern.ch' in url):
                    url_params_dict = dict([part.split('=') for part in params.split('&') \
                                            if len(part) == 2])
                    if url_params_dict.has_key('categ') and \
                           (url_params_dict['categ'].split('.', 1)[0] in cern_arxiv_categories) and \
                           url_params_dict.has_key('id'):
                        # Old arXiv links, used to be handled by
                        # setlink. Provide direct links to arXiv
                        for file_format, label in [('pdf', "PDF")]:  #,
                            #('ps', "PS"),
                            #('e-print', "Source (generally TeX or LaTeX)"),
                            #('abs', "Abstract")]:
                            url = "http://arxiv.org/%(format)s/%(category)s/%(id)s" % \
                                  {'format': file_format,
                                   'category': url_params_dict['categ'],
                                   'id': url_params_dict['id']}
                            parsed_urls['others_urls'].append((url, "%s/%s %s" % \
                                                               (url_params_dict['categ'],
                                                                url_params_dict['id'],
                                                                label)))
                else:
                    parsed_urls['others_urls'].append(
                        (url, descr))  # external url
            else:  # It's a bibdoc!
                assigned = False
                for doc in bibarchive.list_bibdocs():
                    if int(doc.get_latest_version()) > 1:
                        old_versions = True
                    if True in [f.fullname.startswith(filename) \
                                for f in doc.list_all_files()]:
                        assigned = True
                        #doc.getIcon()
                        if not doc.doctype == 'Main' and \
                               distinct_main_and_additional_files == True:
                            # In that case we record that there are
                            # additional files, but don't add them to
                            # returned structure.
                            additionals = True
                        else:
                            if not descr:
                                descr = _('Fulltext')
                            if not parsed_urls['main_urls'].has_key(descr):
                                parsed_urls['main_urls'][descr] = []
                            parsed_urls['main_urls'][descr].append(
                                (url, name, format))
                if not assigned:  # Url is not a bibdoc :-S
                    if not descr:
                        descr = filename
                    parsed_urls['others_urls'].append(
                        (url, descr))  # Let's put it in a general other url

    return (parsed_urls, old_versions, additionals)
Exemplo n.º 8
0
def get_files(bfo,
              distinguish_main_and_additional_files=True,
              include_subformat_icons=False,
              hide_doctypes=None):
    """
    Returns the files available for the given record.
    Returned structure is a tuple (parsed_urls, old_versions, additionals):
     - parsed_urls: contains categorized URLS (see details below)
     - old_versions: set to True if we can have access to old versions
     - additionals: set to True if we have other documents than the 'main' document

     Parameter 'include_subformat_icons' decides if subformat
     considered as icons should be returned

     Parameter hide_doctypes (list) decides which doctypes should not
     be included in the returned structure

    'parsed_urls' is a dictionary in the form::
        {'main_urls' : {'Main'      : [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.pdf', 'aFile', 'PDF'),
                                       ('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.gif', 'aFile', 'GIF')],
                        'Additional': [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/bFile.pdf', 'bFile', 'PDF')]},

         'other_urls': [('http://externalurl.com/aFile.pdf', 'Fulltext'),      # url(8564_u), description(8564_z/y)
                        ('http://externalurl.com/bFile.pdf', 'Fulltext')],

         'cern_urls' : [('http://cern.ch/aFile.pdf', 'Fulltext'),              # url(8564_u), description(8564_z/y)
                        ('http://cern.ch/bFile.pdf', 'Fulltext')],
        }

    Some notes about returned structure:
        - key 'cern_urls' is only available on CERN site
        - keys in main_url dictionaries are defined by the BibDoc.
        - older versions are not part of the parsed urls
        - returns only main files when possible, that is when doctypes
          make a distinction between 'Main' files and other
          files. Otherwise returns all the files as main. This is only
          enabled if distinguish_main_and_additional_files is set to True
    """

    _ = gettext_set_language(bfo.lang)

    if hide_doctypes is None:
        hide_doctypes = []

    urls = bfo.fields("8564_")
    bibarchive = BibRecDocs(bfo.recID)

    old_versions = False  # We can provide link to older files. Will be
    # set to True if older files are found.
    additionals = False  # We have additional files. Will be set to
    # True if additional files are found.

    # Prepare object to return
    parsed_urls = {
        'main_urls': {},  # Urls hosted by Invenio (bibdocs)
        'others_urls': []  # External urls
    }
    if CFG_CERN_SITE:
        parsed_urls['cern_urls'] = []  # cern.ch urls

        if [
                url for url in urls
                if url.get('u', '').startswith('http://arxiv.org/pdf/')
        ]:
            # We have a link to arXiv PDF. We can hide the files on
            # CDS in some cases:
            hide_doctypes.append('CMSPUB_SOURCEF')
            hide_doctypes.append('ATLPUB_SOURCEF')
            hide_doctypes.append('LHCBPB_SOURCEF')

    # Doctypes can of any type, but when there is one file marked as
    # 'Main', we consider that there is a distinction between "main"
    # and "additional" files. Otherwise they will all be considered
    # equally as main files
    distinct_main_and_additional_files = False
    if len(bibarchive.list_bibdocs(doctype='Main')) > 0 and \
           distinguish_main_and_additional_files:
        distinct_main_and_additional_files = True
    # Parse URLs
    for complete_url in urls:
        if complete_url.has_key('u'):
            url = complete_url['u']
            (dummy, host, path, dummy, params, dummy) = urlparse(url)
            subformat = complete_url.get('x', '')
            filename = urllib.unquote(basename(path))
            name = file_strip_ext(filename)
            url_format = filename[len(name):]
            if url_format.startswith('.'):
                url_format = url_format[1:]
            if compose_format(
                    url_format, subformat
            ) in _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS:
                ## This format should be hidden.
                continue

            descr = _("Fulltext")
            if complete_url.has_key('y'):
                descr = complete_url['y']
                if descr == 'Fulltext':
                    descr = _("Fulltext")
            if not url.startswith(CFG_SITE_URL):  # Not a bibdoc?
                if not descr:  # For not bibdoc let's have a description
                    # Display the URL in full:
                    descr = url
                if CFG_CERN_SITE and 'cern.ch' in host and \
                       ('/setlink?' in url or \
                        'cms' in host or \
                        'documents.cern.ch' in url or \
                        'doc.cern.ch' in url or \
                        'preprints.cern.ch' in url):
                    url_params_dict = dict([
                        part.split('=') for part in params.split('&')
                        if len(part.split('=')) == 2
                    ])
                    if url_params_dict.has_key('categ') and \
                           (url_params_dict['categ'].split('.', 1)[0] in cern_arxiv_categories) and \
                           url_params_dict.has_key('id'):
                        # Old arXiv links, used to be handled by
                        # setlink. Provide direct links to arXiv
                        for file_format, label in [('pdf', "PDF")]:  #,
                            #('ps', "PS"),
                            #('e-print', "Source (generally TeX or LaTeX)"),
                            #('abs', "Abstract")]:
                            url = "http://arxiv.org/%(format)s/%(category)s/%(id)s" % \
                                  {'format': file_format,
                                   'category': url_params_dict['categ'],
                                   'id': url_params_dict['id']}
                            parsed_urls['others_urls'].append((url, "%s/%s %s" % \
                                                               (url_params_dict['categ'],
                                                                url_params_dict['id'],
                                                                label)))
                else:
                    parsed_urls['others_urls'].append(
                        (url, descr))  # external url
            else:  # It's a bibdoc!
                assigned = False
                for doc in bibarchive.list_bibdocs():
                    if int(doc.get_latest_version()) > 1:
                        old_versions = True
                    if True in [f.get_full_name().startswith(filename) \
                                    for f in doc.list_all_files()]:
                        assigned = True
                        if not include_subformat_icons and \
                               CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(subformat):
                            # This is an icon and we want to skip it
                            continue
                        doctype = doc.get_doctype(bfo.recID)
                        if doctype in hide_doctypes:
                            continue
                        if not doctype == 'Main' and \
                               distinct_main_and_additional_files == True:
                            # In that case we record that there are
                            # additional files, but don't add them to
                            # returned structure.
                            additionals = True
                        else:
                            if not descr:
                                descr = _('Fulltext')
                            if not parsed_urls['main_urls'].has_key(descr):
                                parsed_urls['main_urls'][descr] = []
                            params_dict = parse_qs(params)
                            if 'subformat' in params_dict:
                                url_format += ' (%s)' % params_dict[
                                    'subformat'][0]
                            parsed_urls['main_urls'][descr].append(
                                (url, name, url_format))
                if not assigned:  # Url is not a bibdoc :-S
                    if not descr:
                        descr = filename
                    parsed_urls['others_urls'].append(
                        (url, descr))  # Let's put it in a general other url
    return (parsed_urls, old_versions, additionals)
def get_files(bfo):
    """
    Returns the files available for the given record.
    Returned structure is a tuple (parsed_urls, old_versions, additionals):

    """
    _ = gettext_set_language(bfo.lang)
    try:
        bibarchive = BibRecDocs(bfo.recID)
    except ValueError:
        # sometimes recID is no an integer...
        # so don't print anything if this is the case
        return ([], [], [])

    main_documents = []
    additional_documents = []
    external_urls = []

    user_info = bfo.user_info

    # before verifing access, assert that the user has a remote_ip and it is not
    # an internal call
    remote_ip = user_info.get('remote_ip', '')
    is_thesis = bfo.field("980__a") == 'THESIS' and bfo.field(
        "973__a") == 'EPFL'
    is_published = bfo.field("973__s") == 'PUBLISHED'

    # Parse URLs
    urls = bfo.fields("8564_")
    for complete_url in urls:
        if not complete_url.has_key('u'):
            continue

        #remove icons
        if complete_url.has_key('x') and complete_url['x'].lower() == 'icon':
            continue

        url = complete_url['u']
        (dummy, host, path, dummy, params, dummy) = urlparse(url)
        filename = urllib.unquote(basename(path))
        name = file_strip_ext(filename)
        format = filename[len(name):]
        if format.startswith('.'):
            format = format[1:]

        if not url.startswith(CFG_SITE_URL) and not complete_url.get(
                'i'):  # Not a bibdoc?
            descr = complete_url.get('z', 'URL')
            external_urls.append((url, descr, format, 0))

        else:  # It's a bibdoc!
            if complete_url.get('i') == 'EXTERNAL':
                filename = complete_url.get('z') or basename(complete_url['u'])
                if is_thesis and complete_url.get('x') == 'RESTRICTED':
                    if not complete_url.get('z'):
                        filename = _("Fulltext")
                    if not remote_ip:
                        # no real access
                        main_documents.append(
                            (thesis_link(bfo), filename,
                             basename(complete_url['u']).split('.')[-1], 0))
                        continue

                    if acc_authorize_action(bfo.user_info,
                                            'viewrestrdoc',
                                            status='RESTRICTED')[0] == 1:
                        # no real access
                        main_documents.append(
                            (thesis_link(bfo), filename,
                             basename(complete_url['u']).split('.')[-1], 0))
                        continue

                is_sar = 'SAR' in bfo.fields('909C0p')
                if is_sar:
                    main_documents.append(
                        (url, _("Get the whole digitalized project"), '', 0))
                    continue

                main_documents.append(
                    (complete_url['u'], filename,
                     basename(complete_url['u']).split('.')[-1], 0))
            else:
                # Internal
                for doc in bibarchive.list_bibdocs():
                    size = doc.get_total_size_latest_version()
                    descr = doc.get_description(format)

                    if True in [
                            f.fullname.startswith(filename)
                            for f in doc.list_all_files()
                    ]:
                        if doc.status and doc.status.lower() == 'icon':
                            continue
                        restriction = doc.list_latest_files()[0].status

                        #no ip = no access, only show the public files
                        if not remote_ip:
                            if restriction not in ('LAB', 'RESTRICTED',
                                                   'PRIVATE', 'DELETED'):
                                if doc.get_type().lower() == 'main':
                                    if not descr or descr.lower() == 'n/a':
                                        descr = name
                                        if is_thesis:
                                            descr = _("Fulltext")
                                    if not url in [
                                            m_url
                                            for (m_url, m_descr, m_format,
                                                 m_size) in main_documents
                                    ]:
                                        main_documents.append(
                                            (url, descr, format, size))
                                else:
                                    if not descr or descr.lower() == 'n/a':
                                        descr = name
                                    if not url in [
                                            m_url
                                            for (m_url, junk, junk,
                                                 junk) in additional_documents
                                    ]:
                                        additional_documents.append(
                                            (url, descr, format, size))
                            continue

                        #try:
                        if doc.list_latest_files()[0].is_restricted(
                                bfo.user_info)[0] == 1:
                            continue
                        #except:
                        #   restricted = 0

                        if doc.get_type().lower() == 'main':
                            if not descr or descr.lower() == 'n/a':
                                descr = name
                            if is_thesis:
                                if restriction == 'RESTRICTED':
                                    descr = _("EPFL intranet: Fulltext")
                                else:
                                    descr = "Texte intégral / Full text"
                            if not url in [
                                    m_url for (m_url, m_descr, m_format,
                                               m_size) in main_documents
                            ]:
                                main_documents.append(
                                    (url, descr, format, size))

                        else:
                            if not descr or descr.lower() == 'n/a':
                                descr = name
                            if is_thesis and restriction == 'RESTRICTED':
                                descr = _("EPFL intranet: %s") % descr
                            if not url in [
                                    m_url for (m_url, junk, junk,
                                               junk) in additional_documents
                            ]:
                                additional_documents.append(
                                    (url, descr, format, size))
    if is_thesis and not main_documents and is_published:
        main_documents.append(
            (thesis_link(bfo), _("Order free pdf"), 'pdf', 0))
    return (main_documents, additional_documents, external_urls)