Пример #1
0
    def formatter(bwo, **kwargs):
        """Return a formatted version of the data."""
        from invenio.modules.formatter.engine import format_record

        data = bwo.get_data()
        if not data:
            return ''
        formatter = kwargs.get("formatter", None)
        format = kwargs.get("format", None)
        if formatter:
            # A seperate formatter is supplied
            return formatter(data)
        from invenio.modules.records.api import Record
        if isinstance(data, collections.Mapping):
            # Dicts are cool on its own, but maybe its SmartJson (record)
            try:
                data = Record(data.dumps()).legacy_export_as_marc()
            except (TypeError, KeyError):
                # Maybe not, submission?
                return data

        if isinstance(data, string_types):
            # Its a string type, lets try to convert
            if format:
                # We can try formatter!
                # If already XML, format_record does not like it.
                if format != 'xm':
                    try:
                        return format_record(recID=None,
                                             of=format,
                                             xml_record=data)
                    except TypeError:
                        # Wrong kind of type
                        pass
                else:
                    # So, XML then
                    from xml.dom.minidom import parseString

                    try:
                        pretty_data = parseString(data)
                        return pretty_data.toprettyxml()
                    except TypeError:
                        # Probably not proper XML string then
                        return "Data cannot be parsed: %s" % (data, )
                    except Exception:
                        # Some other parsing error
                        pass

            # Just return raw string
            return data
        if isinstance(data, set):
            return list(data)
        # Not any of the above types. How juicy!
        return data
    def formatter(bwo, **kwargs):
        """Return a formatted version of the data."""
        from invenio.modules.formatter.engine import format_record

        data = bwo.get_data()
        if not data:
            return ''
        formatter = kwargs.get("formatter", None)
        format = kwargs.get("format", None)
        if formatter:
            # A seperate formatter is supplied
            return formatter(data)
        from invenio.modules.records.api import Record
        if isinstance(data, collections.Mapping):
            # Dicts are cool on its own, but maybe its SmartJson (record)
            try:
                data = Record(data.dumps()).legacy_export_as_marc()
            except (TypeError, KeyError):
                # Maybe not, submission?
                return data

        if isinstance(data, string_types):
            # Its a string type, lets try to convert
            if format:
                # We can try formatter!
                # If already XML, format_record does not like it.
                if format != 'xm':
                    try:
                        return format_record(recID=None,
                                             of=format,
                                             xml_record=data)
                    except TypeError:
                        # Wrong kind of type
                        pass
                else:
                    # So, XML then
                    from xml.dom.minidom import parseString

                    try:
                        pretty_data = parseString(data)
                        return pretty_data.toprettyxml()
                    except TypeError:
                        # Probably not proper XML string then
                        return "Data cannot be parsed: %s" % (data,)
                    except Exception:
                        # Some other parsing error
                        pass

            # Just return raw string
            return data
        if isinstance(data, set):
            return list(data)
        # Not any of the above types. How juicy!
        return data
Пример #3
0
    def formatter(bwo, **kwargs):
        """Return formatted data of object."""
        from invenio.modules.formatter.engine import format_record
        deposit_object = Deposition(bwo)
        submission_data = deposit_object.get_latest_sip()
        marcxml = submission_data.package

        of = kwargs.get("format", "hd")
        if of == "xm":
            return marcxml
        else:
            return format_record(recID=None,
                                 of=kwargs.get("format", "hd"),
                                 xml_record=marcxml)
Пример #4
0
    def formatter(bwo, **kwargs):
        """Return formatted data of object."""
        from invenio.modules.formatter.engine import format_record
        deposit_object = Deposition(bwo)
        submission_data = deposit_object.get_latest_sip()
        marcxml = submission_data.package

        of = kwargs.get("format", "hd")
        if of == "xm":
            return marcxml
        else:
            return format_record(
                recID=None,
                of=kwargs.get("format", "hd"),
                xml_record=marcxml
            )
Пример #5
0
    def extract_references(self, req, form):
        """Refrences extraction page

        This page can be used for authors to test their pdfs against our
        refrences extraction process"""
        user_info = collect_user_info(req)

        # Handle the 3 POST parameters
        if 'pdf' in form and form['pdf']:
            pdf = form['pdf']
            references_xml = extract_from_pdf_string(pdf)
        elif 'arxiv' in form and form['arxiv']:
            url = make_arxiv_url(arxiv_id=form['arxiv'])
            references_xml = extract_references_from_url_xml(url)
        elif 'url' in form and form['url']:
            url = form['url']
            references_xml = extract_references_from_url_xml(url)
        elif 'txt' in form and form['txt']:
            txt = form['txt']
            references_xml = extract_references_from_string_xml(txt)
        else:
            references_xml = None

        # If we have not uploaded anything yet
        # Display the form that allows us to do so
        if not references_xml:
            out = self.extract_references_template()
        else:
            out = """
            <style type="text/css">
                #referenceinp_link { display: none; }
            </style>
            """
            out += format_record(0,
                                'hdref',
                                xml_record=references_xml.encode('utf-8'),
                                user_info=user_info)

        # Render the page (including header, footer)
        return page(title='References Extractor',
                    body=out,
                    uid=user_info['uid'],
                    req=req)
Пример #6
0
    def extract(self, req, form):
        """Refrences extraction page

        This page can be used for authors to test their pdfs against our
        refrences extraction process"""
        user_info = collect_user_info(req)

        # Handle the 3 POST parameters
        if 'pdf' in form and form['pdf']:
            pdf = form['pdf']
            references_xml = extract_from_pdf_string(pdf)
        elif 'arxiv' in form and form['arxiv']:
            url = make_arxiv_url(arxiv_id=form['arxiv'])
            references_xml = extract_references_from_url_xml(url)
        elif 'url' in form and form['url']:
            url = form['url']
            references_xml = extract_references_from_url_xml(url)
        elif 'txt' in form and form['txt'].value:
            txt = form['txt'].value.decode('utf-8', 'ignore')
            references_xml = extract_references_from_string_xml(txt)
        else:
            references_xml = None

        # If we have not uploaded anything yet
        # Display the form that allows us to do so
        if not references_xml:
            out = docextract_templates.tmpl_web_form()
        else:
            references_html = format_record(0,
                                           'hdref',
                                            xml_record=references_xml,
                                            user_info=user_info)
            out = docextract_templates.tmpl_web_result(references_html)

        # Render the page (including header, footer)
        return page(title='References Extractor',
                    body=out,
                    uid=user_info['uid'],
                    req=req)
Пример #7
0
    def extract(self, req, form):
        """Refrences extraction page

        This page can be used for authors to test their pdfs against our
        refrences extraction process"""
        user_info = collect_user_info(req)

        # Handle the 3 POST parameters
        if 'pdf' in form and form['pdf']:
            pdf = form['pdf']
            references_xml = extract_from_pdf_string(pdf)
        elif 'arxiv' in form and form['arxiv']:
            url = make_arxiv_url(arxiv_id=form['arxiv'])
            references_xml = extract_references_from_url_xml(url)
        elif 'url' in form and form['url']:
            url = form['url']
            references_xml = extract_references_from_url_xml(url)
        elif 'txt' in form and form['txt'].value:
            txt = form['txt'].value.decode('utf-8', 'ignore')
            references_xml = extract_references_from_string_xml(txt)
        else:
            references_xml = None

        # If we have not uploaded anything yet
        # Display the form that allows us to do so
        if not references_xml:
            out = docextract_templates.tmpl_web_form()
        else:
            references_html = format_record(0,
                                            'hdref',
                                            xml_record=references_xml,
                                            user_info=user_info)
            out = docextract_templates.tmpl_web_result(references_html)

        # Render the page (including header, footer)
        return page(title='References Extractor',
                    body=out,
                    uid=user_info['uid'],
                    req=req)
Пример #8
0
def format_records(recIDs, of, ln=None, verbose=0, search_pattern=None,
                   xml_records=None, user_info=None, record_prefix=None,
                   record_separator=None, record_suffix=None, prologue="",
                   epilogue="", req=None, on_the_fly=False):
    """
    Format records given by a list of record IDs or a list of records as xml.

    Add a prefix before each record, a suffix after each record, plus a
    separator between records.

    Also add optional prologue and epilogue to the complete formatted list.

    You can either specify a list of record IDs to format, or a list of xml
    records, but not both (if both are specified recIDs is ignored).

    'record_separator' is a function that returns a string as separator between
    records.  The function must take an integer as unique parameter, which is
    the index in recIDs (or xml_records) of the record that has just been
    formatted. For example separator(i) must return the separator between
    recID[i] and recID[i+1]. Alternatively separator can be a single string,
    which will be used to separate all formatted records.  The same applies to
    'record_prefix' and 'record_suffix'.

    'req' is an optional parameter on which the result of the function are
    printed lively (prints records after records) if it is given. Note that you
    should set 'req' content-type by yourself, and send http header before
    calling this function as it will not do it.

    This function takes the same parameters as :meth:`format_record` except
    for:

    :param recIDs: a list of record IDs
    :type recIDs: list(int)
    :param of: an output format code (or short identifier for the output
               format)
    :type of: string
    :param ln: the language to use to format the record
    :type ln: string
    :param verbose: the level of verbosity from 0 to 9.
                    - 0: silent
                    - 5: errors
                    - 7: errors and warnings, stop if error in format elements
                    - 9: errors and warnings, stop if error (debug mode)
    :type verbose: int
    :param search_pattern: list of strings representing the user request in web
                           interface
    :type search_pattern: list(string)
    :param user_info: the information of the user who will view the formatted
                      page (if applicable)
    :param xml_records: a list of xml string representions of the records to
                        format
    :type xml_records: list(string)
    :param record_prefix: a string printed before B{each} formatted records (n
                          times)
    :type record_prefix: string
    :param record_suffix: a string printed after B{each} formatted records (n
                          times)
    :type record_suffix: string
    :param prologue: a string printed at the beginning of the complete
                     formatted records (1x)
    :type prologue: string
    :param epilogue: a string printed at the end of the complete formatted
                     output (1x)
    :type epilogue: string
    :param record_separator: either a string or a function that returns string
                             to join formatted records
    :param record_separator: string or function
    :param req: an optional request object where to print records
    :param on_the_fly: if False, try to return an already preformatted version
                       of the record in the database
    :type on_the_fly: boolean
    :rtype: string
    """
    if req is not None:
        req.write(prologue)

    formatted_records = ''

    # Fill one of the lists with Nones
    if xml_records is not None:
        recIDs = map(lambda x: None, xml_records)
    else:
        xml_records = map(lambda x: None, recIDs)

    total_rec = len(recIDs)
    last_iteration = False
    for i in range(total_rec):
        if i == total_rec - 1:
            last_iteration = True

        # Print prefix
        if record_prefix is not None:
            if isinstance(record_prefix, str):
                formatted_records += record_prefix
                if req is not None:
                    req.write(record_prefix)
            else:
                string_prefix = record_prefix(i)
                formatted_records += string_prefix
                if req is not None:
                    req.write(string_prefix)

        # Print formatted record
        ln = ln or cfg['CFG_SITE_LANG']
        formatted_record = format_record(recIDs[i], of, ln, verbose,
                                         search_pattern, xml_records[i],
                                         user_info, on_the_fly)
        formatted_records += formatted_record
        if req is not None:
            req.write(formatted_record)

        # Print suffix
        if record_suffix is not None:
            if isinstance(record_suffix, str):
                formatted_records += record_suffix
                if req is not None:
                    req.write(record_suffix)
            else:
                string_suffix = record_suffix(i)
                formatted_records += string_suffix
                if req is not None:
                    req.write(string_suffix)

        # Print separator if needed
        if record_separator is not None and not last_iteration:
            if isinstance(record_separator, str):
                formatted_records += record_separator
                if req is not None:
                    req.write(record_separator)
            else:
                string_separator = record_separator(i)
                formatted_records += string_separator
                if req is not None:
                    req.write(string_separator)

    if req is not None:
        req.write(epilogue)

    return prologue + formatted_records + epilogue