Пример #1
0
def get_files(bfo, distinguish_main_and_additional_files=True, include_subformat_icons=False):
    """
    Returns the files available for the given record.
    Returned structure is a tuple (parsed_urls, old_versions, additionals):
     - parsed_urls: contains categorized URLS (see details below)
     - old_versions: set to True if we can have access to old versions
     - additionals: set to True if we have other documents than the 'main' document

     Parameter 'include_subformat_icons' decides if subformat
     considered as icons should be returned

    'parsed_urls' is a dictionary in the form::
        {'main_urls' : {'Main'      : [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.pdf', 'aFile', 'PDF'),
                                       ('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.gif', 'aFile', 'GIF')],
                        'Additional': [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/bFile.pdf', 'bFile', 'PDF')]},

         'other_urls': [('http://externalurl.com/aFile.pdf', 'Fulltext'),      # url(8564_u), description(8564_z/y)
                        ('http://externalurl.com/bFile.pdf', 'Fulltext')],

         'cern_urls' : [('http://cern.ch/aFile.pdf', 'Fulltext'),              # url(8564_u), description(8564_z/y)
                        ('http://cern.ch/bFile.pdf', 'Fulltext')],
        }

    Some notes about returned structure:
        - key 'cern_urls' is only available on CERN site
        - keys in main_url dictionaries are defined by the BibDoc.
        - older versions are not part of the parsed urls
        - returns only main files when possible, that is when doctypes
          make a distinction between 'Main' files and other
          files. Otherwise returns all the files as main. This is only
          enabled if distinguish_main_and_additional_files is set to True
    """

    _ = gettext_set_language(bfo.lang)

    urls = bfo.fields("8564_")
    bibarchive = BibRecDocs(bfo.recID)

    old_versions = False # We can provide link to older files. Will be
                         # set to True if older files are found.
    additionals = False  # We have additional files. Will be set to
                         # True if additional files are found.

    # Prepare object to return
    parsed_urls = {'main_urls':{},    # Urls hosted by Invenio (bibdocs)
                  'others_urls':[]    # External urls
                  }
    if CFG_CERN_SITE:
        parsed_urls['cern_urls'] = [] # cern.ch urls

    # Doctypes can of any type, but when there is one file marked as
    # 'Main', we consider that there is a distinction between "main"
    # and "additional" files. Otherwise they will all be considered
    # equally as main files
    distinct_main_and_additional_files = False
    if len(bibarchive.list_bibdocs(doctype='Main')) > 0 and \
           distinguish_main_and_additional_files:
        distinct_main_and_additional_files = True
    # Parse URLs
    for complete_url in urls:
        if 'u' in complete_url:
            url = complete_url['u']
            (dummy, host, path, dummy, params, dummy) = urlparse(url)
            subformat = complete_url.get('x', '')
            filename = urllib.unquote(basename(path))
            name = file_strip_ext(filename)
            url_format = filename[len(name):]
            if url_format.startswith('.'):
                url_format = url_format[1:]
            if compose_format(url_format, subformat) in _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS:
                ## This format should be hidden.
                continue

            descr = filename
            if 'y' in complete_url:
                descr = complete_url['y']
                if descr == 'Fulltext':
                    descr = _("Fulltext")
            if not url.startswith(CFG_SITE_URL): # Not a bibdoc?
                if not descr: # For not bibdoc let's have a description
                    # Display the URL in full:
                    descr = url
                if CFG_CERN_SITE and 'cern.ch' in host and \
                       ('/setlink?' in url or \
                        'cms' in host or \
                        'documents.cern.ch' in url or \
                        'doc.cern.ch' in url or \
                        'preprints.cern.ch' in url):
                    url_params_dict = dict([part.split('=') for part in params.split('&') if len(part.split('=')) == 2])
                    if 'categ' in url_params_dict and \
                           (url_params_dict['categ'].split('.', 1)[0] in cern_arxiv_categories) and \
                           'id' in url_params_dict:
                        # Old arXiv links, used to be handled by
                        # setlink. Provide direct links to arXiv
                        for file_format, label in [('pdf', "PDF")]:#,
                            #('ps', "PS"),
                            #('e-print', "Source (generally TeX or LaTeX)"),
                            #('abs', "Abstract")]:
                            url = "http://arxiv.org/%(format)s/%(category)s/%(id)s" % \
                                  {'format': file_format,
                                   'category': url_params_dict['categ'],
                                   'id': url_params_dict['id']}
                            parsed_urls['others_urls'].append((url, "%s/%s %s" % \
                                                               (url_params_dict['categ'],
                                                                url_params_dict['id'],
                                                                label)))
                else:
                    parsed_urls['others_urls'].append((url, descr)) # external url
            else: # It's a bibdoc!
                assigned = False
                for doc in bibarchive.list_bibdocs():
                    if int(doc.get_latest_version()) > 1:
                        old_versions = True
                    if True in [f.get_full_name().startswith(filename) \
                                    for f in doc.list_all_files()]:
                        assigned = True
                        if not include_subformat_icons and \
                               CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(subformat):
                            # This is an icon and we want to skip it
                            continue
                        if not doc.get_doctype(bfo.recID) == 'Main' and \
                               distinct_main_and_additional_files == True:
                            # In that case we record that there are
                            # additional files, but don't add them to
                            # returned structure.
                            additionals = True
                        else:
                            if not descr:
                                descr = _('Fulltext')
                            if descr not in parsed_urls['main_urls']:
                                parsed_urls['main_urls'][descr] = []
                            params_dict = parse_qs(params)
                            if 'subformat' in params_dict:
                                url_format += ' (%s)' % params_dict['subformat'][0]
                            parsed_urls['main_urls'][descr].append((url, name, url_format))
                if not assigned: # Url is not a bibdoc :-S
                    if not descr:
                        descr = filename
                    parsed_urls['others_urls'].append((url, descr)) # Let's put it in a general other url
    return (parsed_urls, old_versions, additionals)
Пример #2
0
def document_upload(req=None, folder="", matching="", mode="", exec_date="", exec_time="", ln=CFG_SITE_LANG, priority="1", email_logs_to=None):
    """ Take files from the given directory and upload them with the appropiate mode.
    @parameters:
        + folder: Folder where the files to upload are stored
        + matching: How to match file names with record fields (report number, barcode,...)
        + mode: Upload mode (append, revise, replace)
    @return: tuple (file, error code)
        file: file name causing the error to notify the user
        error code:
            1 - More than one possible recID, ambiguous behaviour
            2 - No records match that file name
            3 - File already exists
    """
    import sys
    from invenio.legacy.bibdocfile.api import BibRecDocs, file_strip_ext
    from invenio.utils.hash import md5
    import shutil
    from invenio.legacy.search_engine import perform_request_search, \
                                      search_pattern, \
                                      guess_collection_of_a_record
    _ = gettext_set_language(ln)
    errors = []
    info = [0, []] # Number of files read, name of the files
    try:
        files = os.listdir(folder)
    except OSError as error:
        errors.append(("", error))
        return errors, info
    err_desc = {1: _("More than one possible recID, ambiguous behaviour"), 2: _("No records match that file name"),
                3: _("File already exists"), 4: _("A file with the same name and format already exists")}
    # Create directory DONE/ if doesn't exist
    folder = (folder[-1] == "/") and folder or (folder + "/")
    files_done_dir = folder + "DONE/"
    try:
        os.mkdir(files_done_dir)
    except OSError:
        # Directory exists or no write permission
        pass
    for docfile in files:
        if os.path.isfile(os.path.join(folder, docfile)):
            info[0] += 1
            identifier = file_strip_ext(docfile)
            extension = docfile[len(identifier):]
            rec_id = None
            if identifier:
                rec_id = search_pattern(p=identifier, f=matching, m='e')
            if not rec_id:
                errors.append((docfile, err_desc[2]))
                continue
            elif len(rec_id) > 1:
                errors.append((docfile, err_desc[1]))
                continue
            else:
                rec_id = str(list(rec_id)[0])
            rec_info = BibRecDocs(rec_id)
            if rec_info.bibdocs:
                for bibdoc in rec_info.bibdocs:
                    attached_files = bibdoc.list_all_files()
                    file_md5 = md5(open(os.path.join(folder, docfile), "rb").read()).hexdigest()
                    num_errors = len(errors)
                    for attached_file in attached_files:
                        if attached_file.checksum == file_md5:
                            errors.append((docfile, err_desc[3]))
                            break
                        elif attached_file.get_full_name() == docfile:
                            errors.append((docfile, err_desc[4]))
                            break
                if len(errors) > num_errors:
                    continue
            # Check if user has rights to upload file
            if req is not None:
                file_collection = guess_collection_of_a_record(int(rec_id))
                auth_code, auth_message = acc_authorize_action(req, 'runbatchuploader', collection=file_collection)
                if auth_code != 0:
                    error_msg = _("No rights to upload to collection '%(x_name)s'", x_name=file_collection)
                    errors.append((docfile, error_msg))
                    continue
            # Move document to be uploaded to temporary folder
            (fd, tmp_file) = tempfile.mkstemp(prefix=identifier + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime()) + "_", suffix=extension, dir=CFG_TMPSHAREDDIR)
            shutil.copy(os.path.join(folder, docfile), tmp_file)
            # Create MARC temporary file with FFT tag and call bibupload
            (fd, filename) = tempfile.mkstemp(prefix=identifier + '_', dir=CFG_TMPSHAREDDIR)
            filedesc = os.fdopen(fd, 'w')
            marc_content = """ <record>
                                    <controlfield tag="001">%(rec_id)s</controlfield>
                                        <datafield tag="FFT" ind1=" " ind2=" ">
                                            <subfield code="n">%(name)s</subfield>
                                            <subfield code="a">%(path)s</subfield>
                                        </datafield>
                               </record> """ % {'rec_id': rec_id,
                                                'name': encode_for_xml(identifier),
                                                'path': encode_for_xml(tmp_file),
                                                }
            filedesc.write(marc_content)
            filedesc.close()
            info[1].append(docfile)
            user = ""
            if req is not None:
                user_info = collect_user_info(req)
                user = user_info['nickname']
            if not user:
                user = "******"
            # Execute bibupload with the appropiate mode

            task_arguments = ('bibupload', user, "--" + mode,
                              "--priority=" + priority, "-N", "batchupload")

            if exec_date:
                date = '--runtime=' + "\'" + exec_date + ' ' + exec_time + "\'"
                task_arguments += (date, )
            if email_logs_to:
                task_arguments += ("--email-logs-to", email_logs_to)
            task_arguments += (filename, )

            jobid = task_low_level_submission(*task_arguments)

            # write batch upload history
            run_sql("""INSERT INTO hstBATCHUPLOAD (user, submitdate,
                    filename, execdate, id_schTASK, batch_mode)
                    VALUES (%s, NOW(), %s, %s, %s, "document")""",
                    (user_info['nickname'], docfile,
                    exec_date != "" and (exec_date + ' ' + exec_time)
                    or time.strftime("%Y-%m-%d %H:%M:%S"), str(jobid)))

            # Move file to DONE folder
            done_filename = docfile + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime()) + "_" + str(jobid)
            try:
                os.rename(os.path.join(folder, docfile), os.path.join(files_done_dir, done_filename))
            except OSError:
                errors.append('MoveError')
    return errors, info
Пример #3
0
        def getfile(req, form):
            args = wash_urlargd(form, bibdocfile_templates.files_default_urlargd)
            ln = args['ln']

            _ = gettext_set_language(ln)

            uid = getUid(req)
            user_info = collect_user_info(req)

            verbose = args['verbose']
            if verbose >= 1 and not isUserSuperAdmin(user_info):
                # Only SuperUser can see all the details!
                verbose = 0

            if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1:
                return page_not_authorized(req, "/%s/%s" % (CFG_SITE_RECORD, self.recid),
                                           navmenuid='submit')

            if record_exists(self.recid) < 1:
                msg = "<p>%s</p>" % _("Requested record does not seem to exist.")
                return warning_page(msg, req, ln)

            if record_empty(get_record(self.recid).legacy_create_recstruct()):
                msg = "<p>%s</p>" % _("Requested record does not seem to have been integrated.")
                return warning_page(msg, req, ln)

            (auth_code, auth_message) = check_user_can_view_record(user_info, self.recid)
            if auth_code and user_info['email'] == 'guest':
                cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
                target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                            make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \
                                                    CFG_SITE_SECURE_URL + user_info['uri']}, {})
                return redirect_to_url(req, target, norobot=True)
            elif auth_code:
                return page_not_authorized(req, "../", \
                                            text = auth_message)

            readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1

            # From now on: either the user provided a specific file
            # name (and a possible version), or we return a list of
            # all the available files. In no case are the docids
            # visible.
            try:
                bibarchive = BibRecDocs(self.recid)
            except InvenioBibDocFileError:
                register_exception(req=req, alert_admin=True)
                msg = "<p>%s</p><p>%s</p>" % (
                    _("The system has encountered an error in retrieving the list of files for this document."),
                    _("The error has been logged and will be taken in consideration as soon as possible."))
                return warning_page(msg, req, ln)

            if bibarchive.deleted_p():
                req.status = apache.HTTP_GONE
                return warning_page(_("Requested record does not seem to exist."), req, ln)

            docname = ''
            docformat = ''
            version = ''
            warn = ''

            if filename:
                # We know the complete file name, guess which docid it
                # refers to
                ## TODO: Change the extension system according to ext.py from setlink
                ##       and have a uniform extension mechanism...
                docname = file_strip_ext(filename)
                docformat = filename[len(docname):]
                if docformat and docformat[0] != '.':
                    docformat = '.' + docformat
                if args['subformat']:
                    docformat += ';%s' % args['subformat']
            else:
                docname = args['docname']

            if not docformat:
                docformat = args['format']
                if args['subformat']:
                    docformat += ';%s' % args['subformat']

            if not version:
                version = args['version']

            ## Download as attachment
            is_download = False
            if args['download']:
                is_download = True

            # version could be either empty, or all or an integer
            try:
                int(version)
            except ValueError:
                if version != 'all':
                    version = ''

            display_hidden = isUserSuperAdmin(user_info)

            if version != 'all':
                # search this filename in the complete list of files
                for doc in bibarchive.list_bibdocs():
                    if docname == bibarchive.get_docname(doc.id):
                        try:
                            try:
                                docfile = doc.get_file(docformat, version)
                            except InvenioBibDocFileError as msg:
                                req.status = apache.HTTP_NOT_FOUND
                                if not CFG_INSPIRE_SITE and req.headers_in.get('referer'):
                                    ## There must be a broken link somewhere.
                                    ## Maybe it's good to alert the admin
                                    register_exception(req=req, alert_admin=True)
                                warn += write_warning(_("The format %(x_form)s does not exist for the given version: %(x_vers)s",
                                            x_form=cgi.escape(docformat), x_vers=cgi.escape(str(msg))))
                                break
                            (auth_code, auth_message) = docfile.is_restricted(user_info)
                            if auth_code != 0 and not is_user_owner_of_record(user_info, self.recid):
                                if CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(get_subformat_from_format(docformat)):
                                    return stream_restricted_icon(req)
                                if user_info['email'] == 'guest':
                                    cookie = mail_cookie_create_authorize_action('viewrestrdoc', {'status' : docfile.get_status()})
                                    target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                                    make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \
                                        CFG_SITE_SECURE_URL + user_info['uri']}, {})
                                    redirect_to_url(req, target)
                                else:
                                    req.status = apache.HTTP_UNAUTHORIZED
                                    warn += write_warning(_("This file is restricted: ") + str(auth_message))
                                    break

                            if not docfile.hidden_p():
                                if not readonly:
                                    ip = str(req.remote_ip)
                                    doc.register_download(ip, docfile.get_version(), docformat, uid, self.recid)
                                try:
                                    return docfile.stream(req, download=is_download)
                                except InvenioBibDocFileError as msg:
                                    register_exception(req=req, alert_admin=True)
                                    req.status = apache.HTTP_INTERNAL_SERVER_ERROR
                                    warn += write_warning(_("An error has happened in trying to stream the request file."))
                            else:
                                req.status = apache.HTTP_UNAUTHORIZED
                                warn += write_warning(_("The requested file is hidden and can not be accessed."))

                        except InvenioBibDocFileError as msg:
                            register_exception(req=req, alert_admin=True)

            if docname and docformat and not warn:
                req.status = apache.HTTP_NOT_FOUND
                warn += write_warning(_("Requested file does not seem to exist."))
#            filelist = bibarchive.display("", version, ln=ln, verbose=verbose, display_hidden=display_hidden)
            filelist = bibdocfile_templates.tmpl_display_bibrecdocs(bibarchive, "", version, ln=ln, verbose=verbose, display_hidden=display_hidden)

            t = warn + bibdocfile_templates.tmpl_filelist(
                ln=ln,
                filelist=filelist)

            cc = guess_primary_collection_of_a_record(self.recid)
            cc_id = Collection.query.filter_by(name=cc).value('id')
            unordered_tabs = None  # get_detailed_page_tabs(cc_id, self.recid, ln)
            ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in iteritems(unordered_tabs)]
            ordered_tabs_id.sort(lambda x, y: cmp(x[1], y[1]))
            link_ln = ''
            if ln != CFG_SITE_LANG:
                link_ln = '?ln=%s' % ln
            tabs = [(unordered_tabs[tab_id]['label'],
                     '%s/%s/%s/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, tab_id, link_ln),
                     tab_id == 'files',
                     unordered_tabs[tab_id]['enabled'])
                    for (tab_id, dummy_order) in ordered_tabs_id
                    if unordered_tabs[tab_id]['visible'] is True]

            tabs_counts = {}  # get_detailed_page_tabs_counts(self.recid)
            top = webstyle_templates.detailed_record_container_top(self.recid,
                                                                   tabs,
                                                                   args['ln'],
                                                                   citationnum=tabs_counts['Citations'],
                                                                   referencenum=tabs_counts['References'],
                                                                   discussionnum=tabs_counts['Discussions'])
            bottom = webstyle_templates.detailed_record_container_bottom(self.recid,
                                                                         tabs,
                                                                         args['ln'])
            title, description, keywords = websearch_templates.tmpl_record_page_header_content(req, self.recid, args['ln'])
            return pageheaderonly(title=title,
                        navtrail=create_navtrail_links(cc=cc, aas=0, ln=ln) + \
                                        ''' &gt; <a class="navtrail" href="%s/%s/%s">%s</a>
                                        &gt; %s''' % \
                        (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, title, _("Access to Fulltext")),

                        description=description,
                        keywords=keywords,
                        uid=uid,
                        language=ln,
                        req=req,
                        navmenuid='search',
                        navtrail_append_title_p=0) + \
                        websearch_templates.tmpl_search_pagestart(ln) + \
                        top + t + bottom + \
                        websearch_templates.tmpl_search_pageend(ln) + \
                        pagefooteronly(language=ln, req=req)
Пример #4
0
def document_upload(req=None,
                    folder="",
                    matching="",
                    mode="",
                    exec_date="",
                    exec_time="",
                    ln=CFG_SITE_LANG,
                    priority="1",
                    email_logs_to=None):
    """ Take files from the given directory and upload them with the appropiate mode.
    @parameters:
        + folder: Folder where the files to upload are stored
        + matching: How to match file names with record fields (report number, barcode,...)
        + mode: Upload mode (append, revise, replace)
    @return: tuple (file, error code)
        file: file name causing the error to notify the user
        error code:
            1 - More than one possible recID, ambiguous behaviour
            2 - No records match that file name
            3 - File already exists
    """
    import sys
    from invenio.legacy.bibdocfile.api import BibRecDocs, file_strip_ext
    from invenio.utils.hash import md5
    import shutil
    from invenio.legacy.search_engine import perform_request_search, \
                                      search_pattern, \
                                      guess_collection_of_a_record
    _ = gettext_set_language(ln)
    errors = []
    info = [0, []]  # Number of files read, name of the files
    try:
        files = os.listdir(folder)
    except OSError as error:
        errors.append(("", error))
        return errors, info
    err_desc = {
        1: _("More than one possible recID, ambiguous behaviour"),
        2: _("No records match that file name"),
        3: _("File already exists"),
        4: _("A file with the same name and format already exists")
    }
    # Create directory DONE/ if doesn't exist
    folder = (folder[-1] == "/") and folder or (folder + "/")
    files_done_dir = folder + "DONE/"
    try:
        os.mkdir(files_done_dir)
    except OSError:
        # Directory exists or no write permission
        pass
    for docfile in files:
        if os.path.isfile(os.path.join(folder, docfile)):
            info[0] += 1
            identifier = file_strip_ext(docfile)
            extension = docfile[len(identifier):]
            rec_id = None
            if identifier:
                rec_id = search_pattern(p=identifier, f=matching, m='e')
            if not rec_id:
                errors.append((docfile, err_desc[2]))
                continue
            elif len(rec_id) > 1:
                errors.append((docfile, err_desc[1]))
                continue
            else:
                rec_id = str(list(rec_id)[0])
            rec_info = BibRecDocs(rec_id)
            if rec_info.bibdocs:
                for bibdoc in rec_info.bibdocs:
                    attached_files = bibdoc.list_all_files()
                    file_md5 = md5(
                        open(os.path.join(folder, docfile),
                             "rb").read()).hexdigest()
                    num_errors = len(errors)
                    for attached_file in attached_files:
                        if attached_file.checksum == file_md5:
                            errors.append((docfile, err_desc[3]))
                            break
                        elif attached_file.get_full_name() == docfile:
                            errors.append((docfile, err_desc[4]))
                            break
                if len(errors) > num_errors:
                    continue
            # Check if user has rights to upload file
            if req is not None:
                file_collection = guess_collection_of_a_record(int(rec_id))
                auth_code, auth_message = acc_authorize_action(
                    req, 'runbatchuploader', collection=file_collection)
                if auth_code != 0:
                    error_msg = _(
                        "No rights to upload to collection '%(x_name)s'",
                        x_name=file_collection)
                    errors.append((docfile, error_msg))
                    continue
            # Move document to be uploaded to temporary folder
            (fd, tmp_file) = tempfile.mkstemp(
                prefix=identifier + "_" +
                time.strftime("%Y%m%d%H%M%S", time.localtime()) + "_",
                suffix=extension,
                dir=CFG_TMPSHAREDDIR)
            shutil.copy(os.path.join(folder, docfile), tmp_file)
            # Create MARC temporary file with FFT tag and call bibupload
            (fd, filename) = tempfile.mkstemp(prefix=identifier + '_',
                                              dir=CFG_TMPSHAREDDIR)
            filedesc = os.fdopen(fd, 'w')
            marc_content = """ <record>
                                    <controlfield tag="001">%(rec_id)s</controlfield>
                                        <datafield tag="FFT" ind1=" " ind2=" ">
                                            <subfield code="n">%(name)s</subfield>
                                            <subfield code="a">%(path)s</subfield>
                                        </datafield>
                               </record> """ % {
                'rec_id': rec_id,
                'name': encode_for_xml(identifier),
                'path': encode_for_xml(tmp_file),
            }
            filedesc.write(marc_content)
            filedesc.close()
            info[1].append(docfile)
            user = ""
            if req is not None:
                user_info = collect_user_info(req)
                user = user_info['nickname']
            if not user:
                user = "******"
            # Execute bibupload with the appropiate mode

            task_arguments = ('bibupload', user, "--" + mode,
                              "--priority=" + priority, "-N", "batchupload")

            if exec_date:
                date = '--runtime=' + "\'" + exec_date + ' ' + exec_time + "\'"
                task_arguments += (date, )
            if email_logs_to:
                task_arguments += ("--email-logs-to", email_logs_to)
            task_arguments += (filename, )

            jobid = task_low_level_submission(*task_arguments)

            # write batch upload history
            run_sql(
                """INSERT INTO hstBATCHUPLOAD (user, submitdate,
                    filename, execdate, id_schTASK, batch_mode)
                    VALUES (%s, NOW(), %s, %s, %s, "document")""",
                (user_info['nickname'], docfile, exec_date != "" and
                 (exec_date + ' ' + exec_time)
                 or time.strftime("%Y-%m-%d %H:%M:%S"), str(jobid)))

            # Move file to DONE folder
            done_filename = docfile + "_" + time.strftime(
                "%Y%m%d%H%M%S", time.localtime()) + "_" + str(jobid)
            try:
                os.rename(os.path.join(folder, docfile),
                          os.path.join(files_done_dir, done_filename))
            except OSError:
                errors.append('MoveError')
    return errors, info
Пример #5
0
        def getfile(req, form):
            args = wash_urlargd(form,
                                bibdocfile_templates.files_default_urlargd)
            ln = args['ln']

            _ = gettext_set_language(ln)

            uid = getUid(req)
            user_info = collect_user_info(req)

            verbose = args['verbose']
            if verbose >= 1 and not isUserSuperAdmin(user_info):
                # Only SuperUser can see all the details!
                verbose = 0

            if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1:
                return page_not_authorized(req,
                                           "/%s/%s" %
                                           (CFG_SITE_RECORD, self.recid),
                                           navmenuid='submit')

            if record_exists(self.recid) < 1:
                msg = "<p>%s</p>" % _(
                    "Requested record does not seem to exist.")
                return warning_page(msg, req, ln)

            if record_empty(self.recid):
                msg = "<p>%s</p>" % _(
                    "Requested record does not seem to have been integrated.")
                return warning_page(msg, req, ln)

            (auth_code,
             auth_message) = check_user_can_view_record(user_info, self.recid)
            if auth_code and user_info['email'] == 'guest':
                if webjournal_utils.is_recid_in_released_issue(self.recid):
                    # We can serve the file
                    pass
                else:
                    cookie = mail_cookie_create_authorize_action(
                        VIEWRESTRCOLL, {
                            'collection':
                            guess_primary_collection_of_a_record(self.recid)
                        })
                    target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                             make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \
                                                     CFG_SITE_SECURE_URL + user_info['uri']}, {})
                    return redirect_to_url(req, target, norobot=True)
            elif auth_code:
                if webjournal_utils.is_recid_in_released_issue(self.recid):
                    # We can serve the file
                    pass
                else:
                    return page_not_authorized(req, "../", \
                                               text = auth_message)

            readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1

            # From now on: either the user provided a specific file
            # name (and a possible version), or we return a list of
            # all the available files. In no case are the docids
            # visible.
            try:
                bibarchive = BibRecDocs(self.recid)
            except InvenioBibDocFileError:
                register_exception(req=req, alert_admin=True)
                msg = "<p>%s</p><p>%s</p>" % (
                    _("The system has encountered an error in retrieving the list of files for this document."
                      ),
                    _("The error has been logged and will be taken in consideration as soon as possible."
                      ))
                return warning_page(msg, req, ln)

            if bibarchive.deleted_p():
                req.status = apache.HTTP_GONE
                return warning_page(
                    _("Requested record does not seem to exist."), req, ln)

            docname = ''
            docformat = ''
            version = ''
            warn = ''

            if filename:
                # We know the complete file name, guess which docid it
                # refers to
                ## TODO: Change the extension system according to ext.py from setlink
                ##       and have a uniform extension mechanism...
                docname = file_strip_ext(filename)
                docformat = filename[len(docname):]
                if docformat and docformat[0] != '.':
                    docformat = '.' + docformat
                if args['subformat']:
                    docformat += ';%s' % args['subformat']
            else:
                docname = args['docname']

            if not docformat:
                docformat = args['format']
                if args['subformat']:
                    docformat += ';%s' % args['subformat']

            if not version:
                version = args['version']

            ## Download as attachment
            is_download = False
            if args['download']:
                is_download = True

            # version could be either empty, or all or an integer
            try:
                int(version)
            except ValueError:
                if version != 'all':
                    version = ''

            display_hidden = isUserSuperAdmin(user_info)

            if version != 'all':
                # search this filename in the complete list of files
                for doc in bibarchive.list_bibdocs():
                    if docname == bibarchive.get_docname(doc.id):
                        try:
                            try:
                                docfile = doc.get_file(docformat, version)
                            except InvenioBibDocFileError as msg:
                                req.status = apache.HTTP_NOT_FOUND
                                if not CFG_INSPIRE_SITE and req.headers_in.get(
                                        'referer'):
                                    ## There must be a broken link somewhere.
                                    ## Maybe it's good to alert the admin
                                    register_exception(req=req,
                                                       alert_admin=True)
                                warn += write_warning(
                                    _("The format %(x_form)s does not exist for the given version: %(x_vers)s",
                                      x_form=cgi.escape(docformat),
                                      x_vers=cgi.escape(str(msg))))
                                break
                            (auth_code,
                             auth_message) = docfile.is_restricted(user_info)
                            if auth_code != 0 and not is_user_owner_of_record(
                                    user_info, self.recid):
                                if CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(
                                        get_subformat_from_format(docformat)):
                                    return stream_restricted_icon(req)
                                if user_info['email'] == 'guest':
                                    cookie = mail_cookie_create_authorize_action(
                                        'viewrestrdoc',
                                        {'status': docfile.get_status()})
                                    target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                                    make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \
                                        CFG_SITE_SECURE_URL + user_info['uri']}, {})
                                    redirect_to_url(req, target)
                                else:
                                    req.status = apache.HTTP_UNAUTHORIZED
                                    warn += write_warning(
                                        _("This file is restricted: ") +
                                        str(auth_message))
                                    break

                            if not docfile.hidden_p():
                                if not readonly:
                                    ip = str(req.remote_ip)
                                    doc.register_download(
                                        ip, docfile.get_version(), docformat,
                                        uid, self.recid)
                                try:
                                    return docfile.stream(req,
                                                          download=is_download)
                                except InvenioBibDocFileError as msg:
                                    register_exception(req=req,
                                                       alert_admin=True)
                                    req.status = apache.HTTP_INTERNAL_SERVER_ERROR
                                    warn += write_warning(
                                        _("An error has happened in trying to stream the request file."
                                          ))
                            else:
                                req.status = apache.HTTP_UNAUTHORIZED
                                warn += write_warning(
                                    _("The requested file is hidden and can not be accessed."
                                      ))

                        except InvenioBibDocFileError as msg:
                            register_exception(req=req, alert_admin=True)

            # Prevent leaking of restricted file names
            req.status = apache.HTTP_NOT_FOUND
            return

            if docname and docformat and not warn:
                req.status = apache.HTTP_NOT_FOUND
                warn += write_warning(
                    _("Requested file does not seem to exist."))


#            filelist = bibarchive.display("", version, ln=ln, verbose=verbose, display_hidden=display_hidden)
            filelist = bibdocfile_templates.tmpl_display_bibrecdocs(
                bibarchive,
                "",
                version,
                ln=ln,
                verbose=verbose,
                display_hidden=display_hidden)

            t = warn + bibdocfile_templates.tmpl_filelist(ln=ln,
                                                          filelist=filelist)

            cc = guess_primary_collection_of_a_record(self.recid)
            unordered_tabs = get_detailed_page_tabs(get_colID(cc), self.recid,
                                                    ln)
            ordered_tabs_id = [(tab_id, values['order'])
                               for (tab_id,
                                    values) in iteritems(unordered_tabs)]
            ordered_tabs_id.sort(lambda x, y: cmp(x[1], y[1]))
            link_ln = ''
            if ln != CFG_SITE_LANG:
                link_ln = '?ln=%s' % ln
            tabs = [
                (unordered_tabs[tab_id]['label'], '%s/%s/%s/%s%s' %
                 (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, tab_id, link_ln),
                 tab_id == 'files', unordered_tabs[tab_id]['enabled'])
                for (tab_id, dummy_order) in ordered_tabs_id
                if unordered_tabs[tab_id]['visible'] is True
            ]

            tabs_counts = get_detailed_page_tabs_counts(self.recid)
            top = webstyle_templates.detailed_record_container_top(
                self.recid,
                tabs,
                args['ln'],
                citationnum=tabs_counts['Citations'],
                referencenum=tabs_counts['References'],
                discussionnum=tabs_counts['Discussions'])
            bottom = webstyle_templates.detailed_record_container_bottom(
                self.recid, tabs, args['ln'])
            title, description, keywords = websearch_templates.tmpl_record_page_header_content(
                req, self.recid, args['ln'])
            return pageheaderonly(title=title,
                        navtrail=create_navtrail_links(cc=cc, aas=0, ln=ln) + \
                                        ''' &gt; <a class="navtrail" href="%s/%s/%s">%s</a>
                                        &gt; %s''' % \
                        (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, title, _("Access to Fulltext")),

                        description=description,
                        keywords=keywords,
                        uid=uid,
                        language=ln,
                        req=req,
                        navmenuid='search',
                        navtrail_append_title_p=0) + \
                        websearch_templates.tmpl_search_pagestart(ln) + \
                        top + t + bottom + \
                        websearch_templates.tmpl_search_pageend(ln) + \
                        pagefooteronly(language=ln, req=req)
Пример #6
0
def get_files(bfo,
              distinguish_main_and_additional_files=True,
              include_subformat_icons=False):
    """
    Returns the files available for the given record.
    Returned structure is a tuple (parsed_urls, old_versions, additionals):
     - parsed_urls: contains categorized URLS (see details below)
     - old_versions: set to True if we can have access to old versions
     - additionals: set to True if we have other documents than the 'main' document

     Parameter 'include_subformat_icons' decides if subformat
     considered as icons should be returned

    'parsed_urls' is a dictionary in the form::
        {'main_urls' : {'Main'      : [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.pdf', 'aFile', 'PDF'),
                                       ('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.gif', 'aFile', 'GIF')],
                        'Additional': [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/bFile.pdf', 'bFile', 'PDF')]},

         'other_urls': [('http://externalurl.com/aFile.pdf', 'Fulltext'),      # url(8564_u), description(8564_z/y)
                        ('http://externalurl.com/bFile.pdf', 'Fulltext')],

         'cern_urls' : [('http://cern.ch/aFile.pdf', 'Fulltext'),              # url(8564_u), description(8564_z/y)
                        ('http://cern.ch/bFile.pdf', 'Fulltext')],
        }

    Some notes about returned structure:
        - key 'cern_urls' is only available on CERN site
        - keys in main_url dictionaries are defined by the BibDoc.
        - older versions are not part of the parsed urls
        - returns only main files when possible, that is when doctypes
          make a distinction between 'Main' files and other
          files. Otherwise returns all the files as main. This is only
          enabled if distinguish_main_and_additional_files is set to True
    """
    CFG_SITE_URL = current_app.config['CFG_SITE_URL']
    CFG_CERN_SITE = current_app.config['CFG_CERN_SITE']
    CFG_BIBFORMAT_HIDDEN_FILE_FORMATS = current_app.config[
        'CFG_BIBFORMAT_HIDDEN_FILE_FORMATS']
    _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS = set(
        normalize_format(fmt) for fmt in CFG_BIBFORMAT_HIDDEN_FILE_FORMATS)

    _ = gettext_set_language(bfo.lang)

    urls = bfo.fields("8564_")
    bibarchive = BibRecDocs(bfo.recID)

    old_versions = False  # We can provide link to older files. Will be
    # set to True if older files are found.
    additionals = False  # We have additional files. Will be set to
    # True if additional files are found.

    # Prepare object to return
    parsed_urls = {
        'main_urls': {},  # Urls hosted by Invenio (bibdocs)
        'others_urls': []  # External urls
    }
    if CFG_CERN_SITE:
        parsed_urls['cern_urls'] = []  # cern.ch urls

    # Doctypes can of any type, but when there is one file marked as
    # 'Main', we consider that there is a distinction between "main"
    # and "additional" files. Otherwise they will all be considered
    # equally as main files
    distinct_main_and_additional_files = False
    if len(bibarchive.list_bibdocs(doctype='Main')) > 0 and \
           distinguish_main_and_additional_files:
        distinct_main_and_additional_files = True
    # Parse URLs
    for complete_url in urls:
        if complete_url.has_key('u'):
            url = complete_url['u']
            (dummy, host, path, dummy, params, dummy) = urlparse(url)
            subformat = complete_url.get('x', '')
            filename = urllib.unquote(basename(path))
            name = file_strip_ext(filename)
            url_format = filename[len(name):]
            if url_format.startswith('.'):
                url_format = url_format[1:]
            if compose_format(
                    url_format, subformat
            ) in _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS:
                ## This format should be hidden.
                continue

            descr = _("Fulltext")
            if complete_url.has_key('y'):
                descr = complete_url['y']
                if descr == 'Fulltext':
                    descr = _("Fulltext")
            if not url.startswith(CFG_SITE_URL):  # Not a bibdoc?
                if not descr:  # For not bibdoc let's have a description
                    # Display the URL in full:
                    descr = url
                if CFG_CERN_SITE and 'cern.ch' in host and \
                       ('/setlink?' in url or \
                        'cms' in host or \
                        'documents.cern.ch' in url or \
                        'doc.cern.ch' in url or \
                        'preprints.cern.ch' in url):
                    url_params_dict = dict([
                        part.split('=') for part in params.split('&')
                        if len(part.split('=')) == 2
                    ])
                    if url_params_dict.has_key('categ') and \
                           (url_params_dict['categ'].split('.', 1)[0] in cern_arxiv_categories) and \
                           url_params_dict.has_key('id'):
                        # Old arXiv links, used to be handled by
                        # setlink. Provide direct links to arXiv
                        for file_format, label in [('pdf', "PDF")]:  #,
                            #('ps', "PS"),
                            #('e-print', "Source (generally TeX or LaTeX)"),
                            #('abs', "Abstract")]:
                            url = "http://arxiv.org/%(format)s/%(category)s/%(id)s" % \
                                  {'format': file_format,
                                   'category': url_params_dict['categ'],
                                   'id': url_params_dict['id']}
                            parsed_urls['others_urls'].append((url, "%s/%s %s" % \
                                                               (url_params_dict['categ'],
                                                                url_params_dict['id'],
                                                                label)))
                else:
                    parsed_urls['others_urls'].append(
                        (url, descr))  # external url
            else:  # It's a bibdoc!
                assigned = False
                for doc in bibarchive.list_bibdocs():
                    if int(doc.get_latest_version()) > 1:
                        old_versions = True
                    if True in [f.get_full_name().startswith(filename) \
                                    for f in doc.list_all_files()]:
                        assigned = True
                        if not include_subformat_icons and \
                               CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(subformat):
                            # This is an icon and we want to skip it
                            continue
                        if not doc.get_doctype(bfo.recID) == 'Main' and \
                               distinct_main_and_additional_files == True:
                            # In that case we record that there are
                            # additional files, but don't add them to
                            # returned structure.
                            additionals = True
                        else:
                            if not descr:
                                descr = _('Fulltext')
                            if not parsed_urls['main_urls'].has_key(descr):
                                parsed_urls['main_urls'][descr] = []
                            params_dict = parse_qs(params)
                            if 'subformat' in params_dict:
                                url_format += ' (%s)' % params_dict[
                                    'subformat'][0]
                            parsed_urls['main_urls'][descr].append(
                                (url, name, url_format))
                if not assigned:  # Url is not a bibdoc :-S
                    if not descr:
                        descr = filename
                    parsed_urls['others_urls'].append(
                        (url, descr))  # Let's put it in a general other url
    return (parsed_urls, old_versions, additionals)