Пример #1
0
   def add_url(self,
               url,
               lastmod=datetime(1900, 1, 1),
               changefreq="",
               priority="",
               alternate=False):
       """ create a new url node. Returns the number of url nodes in sitemap"""
       self.num_urls += 1
       canonical_url, alternate_urls = get_canonical_and_alternates_urls(
           url, drop_ln=not alternate)
       url_node = u"""
 <url>
   <loc>%s</loc>%s
 </url>"""
       optional = ''
       if lastmod:
           optional += u"""
   <lastmod>%s</lastmod>""" % lastmod.strftime('%Y-%m-%dT%H:%M:%S' + \
                                               DEFAULT_TIMEZONE)
       if changefreq:
           optional += u"""
   <changefreq>%s</changefreq>""" % changefreq
       if priority:
           optional += u"""
   <priority>%s</priority>""" % priority
       if alternate:
           for ln, alternate_url in alternate_urls.iteritems():
               ln = ln.replace('_', '-')  ## zh_CN -> zh-CN
               optional += u"""
   <xhtml:link rel="alternate" hreflang="%s" href="%s" />""" % (
                   ln, encode_for_xml(alternate_url, quote=True))
       url_node %= (encode_for_xml(canonical_url), optional)
       self.file_size += len(url_node)
       self.filedescriptor.write(url_node)
       return self.num_urls
   def add_url(self, url, lastmod=datetime(1900, 1, 1), changefreq="", priority="", alternate=False):
       """ create a new url node. Returns the number of url nodes in sitemap"""
       self.num_urls += 1
       canonical_url, alternate_urls = get_canonical_and_alternates_urls(url, drop_ln=not alternate)
       url_node = u"""
 <url>
   <loc>%s</loc>%s
 </url>"""
       optional = ''
       if lastmod:
           optional += u"""
   <lastmod>%s</lastmod>""" % lastmod.strftime('%Y-%m-%dT%H:%M:%S' + \
                                               DEFAULT_TIMEZONE)
       if changefreq:
           optional += u"""
   <changefreq>%s</changefreq>""" % changefreq
       if priority:
           optional += u"""
   <priority>%s</priority>""" % priority
       if alternate:
           for ln, alternate_url in alternate_urls.iteritems():
               ln = ln.replace('_', '-') ## zh_CN -> zh-CN
               optional += u"""
   <xhtml:link rel="alternate" hreflang="%s" href="%s" />""" % (ln, encode_for_xml(alternate_url, quote=True))
       url_node %= (encode_for_xml(canonical_url), optional)
       self.file_size += len(url_node)
       self.filedescriptor.write(url_node)
       return self.num_urls
Пример #3
0
def bibconvert_escape_lxml(dummy_ctx, value):
    """
    Bridge to lxml to escape the provided value.
    """
    try:
        if isinstance(value, str):
            string_value = value
        elif isinstance(value, (int, long)):
            string_value = str(value)
        elif isinstance(value, list):
            value = value[0]
            if isinstance(value, str):
                string_value = value
            elif isinstance(value, (int, long)):
                string_value = str(value)
            else:
                string_value = value.text
        else:
            string_value = value.text

        return encode_for_xml(string_value)

    except Exception, err:
        sys.stderr.write("Error during formatting function evaluation: " + \
                         str(err) + \
                         '\n')
def assemble_caption(begin_line, begin_index, end_line, end_index, lines):
    """
    Take write_messageation about the caption of a picture and put it all together
    in a nice way.  If it spans multiple lines, put it on one line.  If it
    contains controlled characters, strip them out.  If it has tags we don't
    want to worry about, get rid of them, etc.

    @param: begin_line (int): the index of the line where the caption begins
    @param: begin_index (int): the index within the line where the caption
        begins
    @param: end_line (int): the index of the line where the caption ends
    @param: end_index (int): the index within the line where the caption ends
    @param: lines ([string, string, ...]): the line strings of the text

    @return: caption (string): the caption, nicely formatted and pieced together
    """

    # stuff we don't like
    label_head = "\\label{"

    # reassemble that sucker
    if end_line > begin_line:
        # our caption spanned multiple lines
        caption = lines[begin_line][begin_index:]

        for included_line_index in range(begin_line + 1, end_line):
            caption = caption + " " + lines[included_line_index]

        caption = caption + " " + lines[end_line][:end_index]
        caption = caption.replace("\n", " ")
        caption = caption.replace("  ", " ")
    else:
        # it fit on one line
        caption = lines[begin_line][begin_index:end_index]

    # clean out a label tag, if there is one
    label_begin = caption.find(label_head)
    if label_begin > -1:
        # we know that our caption is only one line, so if there's a label
        # tag in it, it will be all on one line.  so we make up some args
        dummy_start, dummy_start_line, label_end, dummy_end = find_open_and_close_braces(0, label_begin, "{", [caption])
        caption = caption[:label_begin] + caption[label_end + 1 :]

    # clean out characters not allowed in MARCXML
    # not allowed: & < >
    try:
        caption = wash_for_utf8(caption)
        caption = encode_for_xml(caption.encode("utf-8", "xmlcharrefreplace"), wash=True)
    except:  # that damn encode thing threw an error on astro-ph/0601014
        sys.stderr.write(caption)
        sys.stderr.write(" cannot be processed\n")
        caption = caption.replace("&", "&amp;").replace("<", "&lt;")
        caption = caption.replace(">", "&gt;")

    caption = caption.strip()

    if len(caption) > 1 and caption[0] == "{" and caption[-1] == "}":
        caption = caption[1:-1]

    return caption
Пример #5
0
def bibconvert_escape_lxml(dummy_ctx, value):
    """
    Bridge to lxml to escape the provided value.
    """
    try:
        if isinstance(value, str):
            string_value = value
        elif isinstance(value, (int, long)):
            string_value = str(value)
        elif isinstance(value, list):
            value = value[0]
            if isinstance(value, str):
                string_value = value
            elif isinstance(value, (int, long)):
                string_value = str(value)
            else:
                string_value = value.text
        else:
            string_value = value.text

        return encode_for_xml(string_value)

    except Exception, err:
        sys.stderr.write("Error during formatting function evaluation: " + \
                         str(err) + \
                         '\n')
Пример #6
0
def assemble_caption(begin_line, begin_index, end_line, end_index, lines):
    """
    Take write_messageation about the caption of a picture and put it all together
    in a nice way.  If it spans multiple lines, put it on one line.  If it
    contains controlled characters, strip them out.  If it has tags we don't
    want to worry about, get rid of them, etc.

    @param: begin_line (int): the index of the line where the caption begins
    @param: begin_index (int): the index within the line where the caption
        begins
    @param: end_line (int): the index of the line where the caption ends
    @param: end_index (int): the index within the line where the caption ends
    @param: lines ([string, string, ...]): the line strings of the text

    @return: caption (string): the caption, nicely formatted and pieced together
    """

    # stuff we don't like
    label_head = '\\label{'

    # reassemble that sucker
    if end_line > begin_line:
        # our caption spanned multiple lines
        caption = lines[begin_line][begin_index:]

        for included_line_index in range(begin_line + 1, end_line):
            caption = caption + ' ' + lines[included_line_index]

        caption = caption + ' ' + lines[end_line][:end_index]
        caption = caption.replace('\n', ' ')
        caption = caption.replace('  ', ' ')
    else:
        # it fit on one line
        caption = lines[begin_line][begin_index:end_index]

    # clean out a label tag, if there is one
    label_begin = caption.find(label_head)
    if label_begin > -1:
        # we know that our caption is only one line, so if there's a label
        # tag in it, it will be all on one line.  so we make up some args
        dummy_start, dummy_start_line, label_end, dummy_end = \
                find_open_and_close_braces(0, label_begin, '{', [caption])
        caption = caption[:label_begin] + caption[label_end + 1:]

    # clean out characters not allowed in MARCXML
    # not allowed: & < >
    try:
        caption = encode_for_xml(caption.encode('utf-8', 'xmlcharrefreplace'), wash=True)
    except: # that damn encode thing threw an error on astro-ph/0601014
        sys.stderr.write(caption)
        sys.stderr.write(' cannot be processed\n')
        caption = caption.replace('&', '&amp;').replace('<', '&lt;')
        caption = caption.replace('>', '&gt;')

    caption = caption.strip()

    if len(caption) > 1 and caption[0] == '{' and caption[-1] == '}':
        caption = caption[1:-1]

    return caption
Пример #7
0
def create_ill_record(book_info):
    """
    Create a new ILL record

    @param book_info: book's information
    @type book_info: tuple

    @return MARC record
    """

    (title, author, place, publisher, year, edition, isbn) = book_info

    ill_record = """
        <record>
            <datafield tag="020" ind1=" " ind2=" ">
                <subfield code="a">%(isbn)s</subfield>
            </datafield>
            <datafield tag="100" ind1=" " ind2=" ">
                <subfield code="a">%(author)s</subfield>
            </datafield>
            <datafield tag="245" ind1=" " ind2=" ">
                <subfield code="a">%(title)s</subfield>
            </datafield>
            <datafield tag="250" ind1=" " ind2=" ">
                <subfield code="a">%(edition)s</subfield>
            </datafield>
            <datafield tag="260" ind1=" " ind2=" ">
                <subfield code="a">%(place)s</subfield>
                <subfield code="b">%(publisher)s</subfield>
                <subfield code="c">%(year)s</subfield>
            </datafield>
            <datafield tag="980" ind1=" " ind2=" ">
                <subfield code="a">ILLBOOK</subfield>
            </datafield>
        </record>
  """ % {
        'isbn': encode_for_xml(isbn),
        'author': encode_for_xml(author),
        'title': encode_for_xml(title),
        'edition': encode_for_xml(edition),
        'place': encode_for_xml(place),
        'publisher': encode_for_xml(publisher),
        'year': encode_for_xml(year)
    }

    file_path = '%s/%s_%s.xml' % (CFG_TMPDIR, 'bibcirculation_ill_book',
                                  time.strftime("%Y%m%d_%H%M%S"))

    xml_file = open(file_path, 'w')
    xml_file.write(ill_record)
    xml_file.close()

    # Pass XML file to BibUpload.
    task_low_level_submission('bibupload', 'bibcirculation', '-P', '5', '-i',
                              file_path)

    return ill_record
Пример #8
0
 def __new__(cls, original_string='', escape_quotes=False):
     if isinstance(original_string, EscapedString):
         escaped_string = str(original_string)
     else:
         if original_string and not str(original_string).strip():
             escaped_string = '&nbsp;'
         else:
             escaped_string = encode_for_xml(str(original_string), wash=True, quote=escape_quotes)
     obj = str.__new__(cls, escaped_string)
     obj.original_string = original_string
     obj.escape_quotes = escape_quotes
     return obj
Пример #9
0
 def __new__(cls, original_string='', escape_quotes=False):
     if isinstance(original_string, EscapedString):
         escaped_string = str(original_string)
     else:
         if original_string and not str(original_string).strip():
             escaped_string = '&nbsp;'
         else:
             escaped_string = encode_for_xml(str(original_string), wash=True, quote=escape_quotes)
     obj = str.__new__(cls, escaped_string)
     obj.original_string = original_string
     obj.escape_quotes = escape_quotes
     return obj
Пример #10
0
def create_ill_record(book_info):
    """
    Create a new ILL record

    @param book_info: book's information
    @type book_info: tuple

    @return MARC record
    """

    (title, author, place, publisher, year, edition, isbn) = book_info

    ill_record = """
        <record>
            <datafield tag="020" ind1=" " ind2=" ">
                <subfield code="a">%(isbn)s</subfield>
            </datafield>
            <datafield tag="100" ind1=" " ind2=" ">
                <subfield code="a">%(author)s</subfield>
            </datafield>
            <datafield tag="245" ind1=" " ind2=" ">
                <subfield code="a">%(title)s</subfield>
            </datafield>
            <datafield tag="250" ind1=" " ind2=" ">
                <subfield code="a">%(edition)s</subfield>
            </datafield>
            <datafield tag="260" ind1=" " ind2=" ">
                <subfield code="a">%(place)s</subfield>
                <subfield code="b">%(publisher)s</subfield>
                <subfield code="c">%(year)s</subfield>
            </datafield>
            <datafield tag="980" ind1=" " ind2=" ">
                <subfield code="a">ILLBOOK</subfield>
            </datafield>
        </record>
  """ % {
        "isbn": encode_for_xml(isbn),
        "author": encode_for_xml(author),
        "title": encode_for_xml(title),
        "edition": encode_for_xml(edition),
        "place": encode_for_xml(place),
        "publisher": encode_for_xml(publisher),
        "year": encode_for_xml(year),
    }

    file_path = "%s/%s_%s.xml" % (CFG_TMPDIR, "bibcirculation_ill_book", time.strftime("%Y%m%d_%H%M%S"))

    xml_file = open(file_path, "w")
    xml_file.write(ill_record)
    xml_file.close()

    # Pass XML file to BibUpload.
    task_low_level_submission("bibupload", "bibcirculation", "-P", "5", "-i", file_path)

    return ill_record
Пример #11
0
def create_ill_record(book_info):
    """
    Create a new ILL record

    @param book_info: book's information
    @type book_info: tuple

    @return MARC record
    """

    (title, author, place, publisher, year, edition, isbn) = book_info

    ill_record = """
        <record>
            <datafield tag="020" ind1=" " ind2=" ">
                <subfield code="a">%(isbn)s</subfield>
            </datafield>
            <datafield tag="100" ind1=" " ind2=" ">
                <subfield code="a">%(author)s</subfield>
            </datafield>
            <datafield tag="245" ind1=" " ind2=" ">
                <subfield code="a">%(title)s</subfield>
            </datafield>
            <datafield tag="250" ind1=" " ind2=" ">
                <subfield code="a">%(edition)s</subfield>
            </datafield>
            <datafield tag="260" ind1=" " ind2=" ">
                <subfield code="a">%(place)s</subfield>
                <subfield code="b">%(publisher)s</subfield>
                <subfield code="c">%(year)s</subfield>
            </datafield>
            <datafield tag="980" ind1=" " ind2=" ">
                <subfield code="a">ILLBOOK</subfield>
            </datafield>
        </record>
  """ % {'isbn':      encode_for_xml(isbn),
         'author':    encode_for_xml(author),
         'title':     encode_for_xml(title),
         'edition':   encode_for_xml(edition),
         'place':     encode_for_xml(place),
         'publisher': encode_for_xml(publisher),
         'year':      encode_for_xml(year)}

    file_path = '%s/%s_%s.xml' % (CFG_TMPDIR, 'bibcirculation_ill_book',
                                  time.strftime("%Y%m%d_%H%M%S"))

    xml_file = open(file_path, 'w')
    xml_file.write(ill_record)
    xml_file.close()

    # Pass XML file to BibUpload.
    task_low_level_submission('bibupload', 'bibcirculation',
                              '-P', '5', '-i', file_path)

    return ill_record
Пример #12
0
def bibconvert_escape_libxslt(dummy_ctx, value):
    """
    Bridge to libxslt to escape the provided value.
    """
    try:
        if isinstance(value, str):
            string_value = value
        elif isinstance(value, (int, long)):
            string_value = str(value)
        else:
            string_value = libxml2.xmlNode(_obj=value[0]).serialize("utf8")

        return encode_for_xml(string_value)

    except Exception, err:
        sys.stderr.write("Error during formatting function evaluation: " + str(err) + "\n")
Пример #13
0
def format_element(bfo, type='xml', encodeForXML='yes'):
    """
    Prints the complete current record as XML.

    @param type: the type of xml. Can be 'xml', 'oai_dc', 'marcxml', 'xd'
    @param encodeForXML: if 'yes', replace all < > and & with html corresponding escaped characters.
    """
    from invenio.bibformat_utils import record_get_xml
    from invenio.textutils import encode_for_xml
    #Can be used to output various xml flavours.

    out = record_get_xml(bfo.recID, format=type, on_the_fly=True)

    if encodeForXML.lower() == 'yes':
        return encode_for_xml(out)
    else:
        return out
Пример #14
0
def format_element(bfo, type='xml', encodeForXML='yes'):
    """
    Prints the complete current record as XML.

    @param type: the type of xml. Can be 'xml', 'oai_dc', 'marcxml', 'xd'
    @param encodeForXML: if 'yes', replace all < > and & with html corresponding escaped characters.
    """
    from invenio.bibformat_utils import record_get_xml
    from invenio.textutils import encode_for_xml
    #Can be used to output various xml flavours.

    out = record_get_xml(bfo.recID, format=type, on_the_fly=True)

    if encodeForXML.lower() == 'yes':
        return encode_for_xml(out)
    else:
        return out
Пример #15
0
def bibconvert_escape_libxslt(dummy_ctx, value):
    """
    Bridge to libxslt to escape the provided value.
    """
    try:
        if isinstance(value, str):
            string_value = value
        elif isinstance(value, (int, long)):
            string_value = str(value)
        else:
            string_value = libxml2.xmlNode(_obj=value[0]).serialize('utf8')

        return encode_for_xml(string_value)

    except Exception, err:
        sys.stderr.write("Error during formatting function evaluation: " + \
                         str(err) + \
                         '\n')
Пример #16
0
def _output_marc(skw_matches, ckw_matches, author_keywords, acronyms, spires=False,
                 kw_field=bconfig.CFG_MAIN_FIELD, auth_field=bconfig.CFG_AUTH_FIELD,
                 acro_field=bconfig.CFG_ACRON_FIELD, provenience='BibClassify'):
    """Outputs the keywords in the MARCXML format.
    @var skw_matches: list of single keywords
    @var ckw_matches: list of composite keywords
    @var author_keywords: dictionary of extracted author keywords
    @var acronyms: dictionary of acronyms
    @var spires: boolean, True=generate spires output - BUT NOTE: it is
            here only not to break compatibility, in fact spires output
            should never be used for xml because if we read marc back
            into the KeywordToken objects, we would not find them
    @keyword provenience: string that identifies source (authority) that
        assigned the contents of the field
    @return: string, formatted MARC"""


    kw_template = ('<datafield tag="%s" ind1="%s" ind2="%s">\n'
                    '    <subfield code="2">%s</subfield>\n'
                    '    <subfield code="a">%s</subfield>\n'
                    '    <subfield code="n">%s</subfield>\n'
                    '    <subfield code="9">%s</subfield>\n'
                    '</datafield>\n')

    output = []

    tag, ind1, ind2 = _parse_marc_code(kw_field)
    for keywords in (skw_matches, ckw_matches):
        if keywords and len(keywords):
            for kw, info in keywords:
                output.append(kw_template % (tag, ind1, ind2, encode_for_xml(provenience),
                                             encode_for_xml(kw.output(spires)), len(info[0]),
                                             encode_for_xml(kw.getType())))

    for field, keywords in ((auth_field, author_keywords), (acro_field, acronyms)):
        if keywords and len(keywords) and field: # field='' we shall not save the keywords
            tag, ind1, ind2 = _parse_marc_code(field)
            for kw, info in keywords.items():
                output.append(kw_template % (tag, ind1, ind2, encode_for_xml(provenience),
                                             encode_for_xml(kw), '', encode_for_xml(kw.getType())))


    return "".join(output)
Пример #17
0
def _output_marc(skw_matches, ckw_matches, author_keywords, acronyms, spires=False,
                 kw_field=bconfig.CFG_MAIN_FIELD, auth_field=bconfig.CFG_AUTH_FIELD,
                 acro_field=bconfig.CFG_ACRON_FIELD, provenience='BibClassify'):
    """Outputs the keywords in the MARCXML format.
    @var skw_matches: list of single keywords
    @var ckw_matches: list of composite keywords
    @var author_keywords: dictionary of extracted author keywords
    @var acronyms: dictionary of acronyms
    @var spires: boolean, True=generate spires output - BUT NOTE: it is
            here only not to break compatibility, in fact spires output
            should never be used for xml because if we read marc back
            into the KeywordToken objects, we would not find them
    @keyword provenience: string that identifies source (authority) that
        assigned the contents of the field
    @return: string, formatted MARC"""


    kw_template = ('<datafield tag="%s" ind1="%s" ind2="%s">\n'
                    '    <subfield code="2">%s</subfield>\n'
                    '    <subfield code="a">%s</subfield>\n'
                    '    <subfield code="n">%s</subfield>\n'
                    '    <subfield code="9">%s</subfield>\n'
                    '</datafield>\n')

    output = []

    tag, ind1, ind2 = _parse_marc_code(kw_field)
    for keywords in (skw_matches, ckw_matches):
        if keywords and len(keywords):
            for kw, info in keywords:
                output.append(kw_template % (tag, ind1, ind2, encode_for_xml(provenience),
                                             encode_for_xml(kw.output(spires)), len(info[0]),
                                             encode_for_xml(kw.getType())))

    for field, keywords in ((auth_field, author_keywords), (acro_field, acronyms)):
        if keywords and len(keywords) and field: # field='' we shall not save the keywords
            tag, ind1, ind2 = _parse_marc_code(field)
            for kw, info in keywords.items():
                output.append(kw_template % (tag, ind1, ind2, encode_for_xml(provenience),
                                             encode_for_xml(kw), '', encode_for_xml(kw.getType())))


    return "".join(output)
Пример #18
0
def print_record(sysno, format='marcxml', record_exists_result=None):
    """Prints record 'sysno' formatted according to 'format'.

    - if record does not exist, return nothing.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is
      'transient' or 'deleted', then return only header, with status
      'deleted'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
      then return nothing.

    Optional parameter 'record_exists_result' has the value of the result
    of the record_exists(sysno) function (in order not to call that function
    again if already done.)
    """

    out = ""

    # sanity check:
    if record_exists_result is not None:
        _record_exists = record_exists_result
    else:
        _record_exists = record_exists(sysno)

    if not _record_exists:
        return

    if (format == "dc") or (format == "oai_dc"):
        format = "xd"

    # print record opening tags:

    out = out + "  <record>\n"

    if _record_exists == -1:  # Deleted?
        if CFG_OAI_DELETED_POLICY == "persistent" or \
               CFG_OAI_DELETED_POLICY == "transient":
            out = out + "    <header status=\"deleted\">\n"
        else:
            return
    else:
        out = out + "   <header>\n"

    for ident in get_field(sysno, CFG_OAI_ID_FIELD):
        out = "%s    <identifier>%s</identifier>\n" % (out,
                                                       escape_space(ident))
    out = "%s    <datestamp>%s</datestamp>\n" % (out,
                                                 get_modification_date(sysno))
    for set in get_field(sysno, CFG_OAI_SET_FIELD):
        if set:
            # Print only if field not empty
            out = "%s    <setSpec>%s</setSpec>\n" % (out, set)
    out = out + "   </header>\n"

    if _record_exists == -1:  # Deleted?
        pass
    else:
        out = out + "   <metadata>\n"

        if format == "marcxml":
            formatted_record = get_preformatted_record(sysno, 'xm')
            if formatted_record is not None:
                ## MARCXML is already preformatted. Adapt it if needed
                # Infoscience modification :
                # Added custom validator from Swiss librarians
                formatted_record = formatted_record.replace(
                    "<record>",
                    "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://ead.nb.admin.ch/web/standards/slb/MARC21/MARC21slim.xsd\" type=\"Bibliographic\">\n     <marc:leader>00000coc  2200000uu 4500</marc:leader>"
                )
                formatted_record = formatted_record.replace(
                    "<record xmlns=\"http://www.loc.gov/MARC21/slim\">",
                    "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://ead.nb.admin.ch/web/standards/slb/MARC21/MARC21slim.xsd\" type=\"Bibliographic\">\n     <marc:leader>00000coc  2200000uu 4500</marc:leader>"
                )
                formatted_record = formatted_record.replace(
                    "</record", "</marc:record")
                formatted_record = formatted_record.replace(
                    "<controlfield", "<marc:controlfield")
                formatted_record = formatted_record.replace(
                    "</controlfield", "</marc:controlfield")
                formatted_record = formatted_record.replace(
                    "<datafield", "<marc:datafield")
                formatted_record = formatted_record.replace(
                    "</datafield", "</marc:datafield")
                formatted_record = formatted_record.replace(
                    "<subfield", "<marc:subfield")
                formatted_record = formatted_record.replace(
                    "</subfield", "</marc:subfield")
                out += formatted_record
            else:
                ## MARCXML is not formatted in the database, so produce it.
                # Infoscience modification
                out = out + "    <marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://ead.nb.admin.ch/web/standards/slb/MARC21/MARC21slim.xsd\" type=\"Bibliographic\">"
                out = out + "     <marc:leader>00000coc  2200000uu 4500</marc:leader>"
                out = "%s     <marc:controlfield tag=\"001\">%d</marc:controlfield>\n" % (
                    out, int(sysno))

                for digit1 in range(0, 10):
                    for digit2 in range(0, 10):
                        bibbx = "bib%d%dx" % (digit1, digit2)
                        bibx = "bibrec_bib%d%dx" % (digit1, digit2)
                        query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
                                "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s "\
                                "ORDER BY bb.field_number, b.tag ASC" % (bibbx, bibx)
                        res = run_sql(query,
                                      (sysno, '%d%d%%' % (digit1, digit2)))
                        field_number_old = -999
                        field_old = ""
                        for row in res:
                            field, value, field_number = row[0], row[1], row[2]
                            ind1, ind2 = field[3], field[4]
                            if ind1 == "_":
                                ind1 = " "
                            if ind2 == "_":
                                ind2 = " "
                            # print field tag
                            if field_number != field_number_old or field[:
                                                                         -1] != field_old[:
                                                                                          -1]:
                                if format == "marcxml":

                                    if field_number_old != -999:
                                        if field_old[0:2] == "00":
                                            out = out + "     </marc:controlfield>\n"
                                        else:
                                            out = out + "     </marc:datafield>\n"

                                    if field[0:2] == "00":
                                        out = "%s     <marc:controlfield tag=\"%s\">\n" % (
                                            out, encode_for_xml(field[0:3]))
                                    else:
                                        out = "%s     <marc:datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\">\n" % (
                                            out, encode_for_xml(field[0:3]),
                                            encode_for_xml(ind1).lower(),
                                            encode_for_xml(ind2).lower())

                                field_number_old = field_number
                                field_old = field
                            # print subfield value
                            if format == "marcxml":
                                value = encode_for_xml(value)

                                if (field[0:2] == "00"):
                                    out = "%s      %s\n" % (out, value)
                                else:
                                    out = "%s      <marc:subfield code=\"%s\">%s</marc:subfield>\n" % (
                                        out, encode_for_xml(field[-1:]), value)

                            # fetch next subfield
                        # all fields/subfields printed in this run, so close the tag:
                        if (format == "marcxml") and field_number_old != -999:
                            if field_old[0:2] == "00":
                                out = out + "     </marc:controlfield>\n"
                            else:
                                out = out + "     </marc:datafield>\n"

                out = out + "    </marc:record>\n"

        elif format == "xd":
            out += format_record(sysno, 'xoaidc')

    # print record closing tags:

        out = out + "   </metadata>\n"

    out = out + "  </record>\n"

    return out
def print_record(sysno, format='marcxml', record_exists_result=None):
    """Prints record 'sysno' formatted according to 'format'.

    - if record does not exist, return nothing.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is
      'transient' or 'deleted', then return only header, with status
      'deleted'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
      then return nothing.

    Optional parameter 'record_exists_result' has the value of the result
    of the record_exists(sysno) function (in order not to call that function
    again if already done.)
    """

    out = ""

    # sanity check:
    if record_exists_result is not None:
        _record_exists = record_exists_result
    else:
        _record_exists = record_exists(sysno)

    if not _record_exists:
        return

    if (format == "dc") or (format == "oai_dc"):
        format = "xd"

    # print record opening tags:

    out = out + "  <record>\n"

    if _record_exists == -1: # Deleted?
        if CFG_OAI_DELETED_POLICY == "persistent" or \
               CFG_OAI_DELETED_POLICY == "transient":
            out = out + "    <header status=\"deleted\">\n"
        else:
            return
    else:
        out = out + "   <header>\n"

    for ident in get_field(sysno, CFG_OAI_ID_FIELD):
        out = "%s    <identifier>%s</identifier>\n" % (out, escape_space(ident))
    out = "%s    <datestamp>%s</datestamp>\n" % (out, get_modification_date(sysno))
    for set in get_field(sysno, CFG_OAI_SET_FIELD):
        if set:
            # Print only if field not empty
            out = "%s    <setSpec>%s</setSpec>\n" % (out, set)
    out = out + "   </header>\n"

    if _record_exists == -1: # Deleted?
        pass
    else:
        out = out + "   <metadata>\n"

        if format == "marcxml":
            formatted_record = get_preformatted_record(sysno, 'xm')
            if formatted_record is not None:
                ## MARCXML is already preformatted. Adapt it if needed
                formatted_record = formatted_record.replace("<record>", "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\" type=\"Bibliographic\">\n     <marc:leader>00000coc  2200000uu 4500</marc:leader>")
                formatted_record = formatted_record.replace("<record xmlns=\"http://www.loc.gov/MARC21/slim\">", "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\" type=\"Bibliographic\">\n     <marc:leader>00000coc  2200000uu 4500</marc:leader>")
                formatted_record = formatted_record.replace("</record", "</marc:record")
                formatted_record = formatted_record.replace("<controlfield", "<marc:controlfield")
                formatted_record = formatted_record.replace("</controlfield", "</marc:controlfield")
                formatted_record = formatted_record.replace("<datafield", "<marc:datafield")
                formatted_record = formatted_record.replace("</datafield", "</marc:datafield")
                formatted_record = formatted_record.replace("<subfield", "<marc:subfield")
                formatted_record = formatted_record.replace("</subfield", "</marc:subfield")
                out += formatted_record
            else:
                ## MARCXML is not formatted in the database, so produce it.
                out = out + "    <marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\" type=\"Bibliographic\">"
                out = out + "     <marc:leader>00000coc  2200000uu 4500</marc:leader>"
                out = "%s     <marc:controlfield tag=\"001\">%d</marc:controlfield>\n" % (out, int(sysno))

                for digit1 in range(0, 10):
                    for digit2 in range(0, 10):
                        bibbx = "bib%d%dx" % (digit1, digit2)
                        bibx = "bibrec_bib%d%dx" % (digit1, digit2)
                        query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
                                "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s "\
                                "ORDER BY bb.field_number, b.tag ASC" % (bibbx, bibx)
                        res = run_sql(query, (sysno, '%d%d%%' % (digit1, digit2)))
                        field_number_old = -999
                        field_old = ""
                        for row in res:
                            field, value, field_number = row[0], row[1], row[2]
                            ind1, ind2 = field[3], field[4]
                            if ind1 == "_":
                                ind1 = " "
                            if ind2 == "_":
                                ind2 = " "
                            # print field tag
                            if field_number != field_number_old or field[:-1] != field_old[:-1]:
                                if format == "marcxml":

                                    if field_number_old != -999:
                                        if field_old[0:2] == "00":
                                            out = out + "     </marc:controlfield>\n"
                                        else:
                                            out = out + "     </marc:datafield>\n"

                                    if field[0:2] == "00":
                                        out = "%s     <marc:controlfield tag=\"%s\">\n" % (out, encode_for_xml(field[0:3]))
                                    else:
                                        out = "%s     <marc:datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\">\n" % (out, encode_for_xml(field[0:3]), encode_for_xml(ind1).lower(), encode_for_xml(ind2).lower())


                                field_number_old = field_number
                                field_old = field
                            # print subfield value
                            if format == "marcxml":
                                value = encode_for_xml(value)

                                if(field[0:2] == "00"):
                                    out = "%s      %s\n" % (out, value)
                                else:
                                    out = "%s      <marc:subfield code=\"%s\">%s</marc:subfield>\n" % (out, encode_for_xml(field[-1:]), value)


                            # fetch next subfield
                        # all fields/subfields printed in this run, so close the tag:
                        if (format == "marcxml") and field_number_old != -999:
                            if field_old[0:2] == "00":
                                out = out + "     </marc:controlfield>\n"
                            else:
                                out = out + "     </marc:datafield>\n"

                out = out + "    </marc:record>\n"

        elif format == "xd":
            out += format_record(sysno, 'xoaidc')

    # print record closing tags:

        out = out + "   </metadata>\n"

    out = out + "  </record>\n"

    return out
Пример #20
0
 def encode_for_marcxml(value):
     from invenio.textutils import encode_for_xml
     return encode_for_xml(str(value))
Пример #21
0
                dir=CFG_TMPSHAREDDIR)
            shutil.copy(os.path.join(folder, docfile), tmp_file)

            # Create MARC temporary file with FFT tag and call bibupload
            (fd, filename) = tempfile.mkstemp(prefix=identifier + '_',
                                              dir=CFG_TMPSHAREDDIR)
            filedesc = os.fdopen(fd, 'w')
            marc_content = """ <record>
                                    <controlfield tag="001">%(rec_id)s</controlfield>
                                        <datafield tag="FFT" ind1=" " ind2=" ">
                                            <subfield code="n">%(name)s</subfield>
                                            <subfield code="a">%(path)s</subfield>
                                        </datafield>
                               </record> """ % {
                'rec_id': rec_id,
                'name': encode_for_xml(identifier),
                'path': encode_for_xml(tmp_file),
            }
            filedesc.write(marc_content)
            filedesc.close()
            info[1].append(docfile)
            user = ""
            if req is not None:
                user_info = collect_user_info(req)
                user = user_info['nickname']
            if not user:
                user = "******"
            # Execute bibupload with the appropiate mode

            task_arguments = ('bibupload', user, "--" + mode,
                              "--priority=" + priority, "-N", "batchupload")
Пример #22
0
def record_get_xml(recID, format='xm', decompress=zlib.decompress,
                   on_the_fly=False):
    """
    Returns an XML string of the record given by recID.

    The function builds the XML directly from the database,
    without using the standard formatting process.

    'format' allows to define the flavour of XML:
        - 'xm' for standard XML
        - 'marcxml' for MARC XML
        - 'oai_dc' for OAI Dublin Core
        - 'xd' for XML Dublin Core

    If record does not exist, returns empty string.
    If the record is deleted, returns an empty MARCXML (with recid
    controlfield, OAI ID fields and 980__c=DELETED)

    @param recID: the id of the record to retrieve
    @param on_the_fly: if False, try to fetch precreated one in database
    @return: the xml string of the record
    """
    from invenio.search_engine import record_exists

    def get_fieldvalues(recID, tag):
        """Return list of field values for field TAG inside record RECID."""
        out = []
        if tag == "001___":
            # we have asked for recID that is not stored in bibXXx tables
            out.append(str(recID))
        else:
            # we are going to look inside bibXXx tables
            digit = tag[0:2]
            bx = "bib%sx" % digit
            bibx = "bibrec_bib%sx" % digit
            query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag LIKE '%s'" \
                    "ORDER BY bibx.field_number, bx.tag ASC" % (bx, bibx, recID, tag)
            res = run_sql(query)
            for row in res:
                out.append(row[0])
        return out

    def get_creation_date(recID, fmt="%Y-%m-%d"):
        "Returns the creation date of the record 'recID'."
        out = ""
        res = run_sql("SELECT DATE_FORMAT(creation_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1)
        if res:
            out = res[0][0]
        return out

    def get_modification_date(recID, fmt="%Y-%m-%d"):
        "Returns the date of last modification for the record 'recID'."
        out = ""
        res = run_sql("SELECT DATE_FORMAT(modification_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1)
        if res:
            out = res[0][0]
        return out

    #_ = gettext_set_language(ln)

    out = ""

    # sanity check:
    record_exist_p = record_exists(recID)
    if record_exist_p == 0: # doesn't exist
        return out

    # print record opening tags, if needed:
    if format == "marcxml" or format == "oai_dc":
        out += "  <record>\n"
        out += "   <header>\n"
        for identifier in get_fieldvalues(recID, CFG_OAI_ID_FIELD):
            out += "    <identifier>%s</identifier>\n" % identifier
        out += "    <datestamp>%s</datestamp>\n" % get_modification_date(recID)
        out += "   </header>\n"
        out += "   <metadata>\n"

    if format.startswith("xm") or format == "marcxml":
        res = None
        if on_the_fly == False:
            # look for cached format existence:
            query = """SELECT value FROM bibfmt WHERE
            id_bibrec='%s' AND format='%s'""" % (recID, format)
            res = run_sql(query, None, 1)
        if res and record_exist_p == 1:
            # record 'recID' is formatted in 'format', so print it
            out += "%s" % decompress(res[0][0])
        else:
            # record 'recID' is not formatted in 'format' -- they are
            # not in "bibfmt" table; so fetch all the data from
            # "bibXXx" tables:
            if format == "marcxml":
                out += """    <record xmlns="http://www.loc.gov/MARC21/slim">\n"""
                out += "        <controlfield tag=\"001\">%d</controlfield>\n" % int(recID)
            elif format.startswith("xm"):
                out += """    <record>\n"""
                out += "        <controlfield tag=\"001\">%d</controlfield>\n" % int(recID)
            if record_exist_p == -1:
                # deleted record, so display only OAI ID and 980:
                oai_ids = get_fieldvalues(recID, CFG_OAI_ID_FIELD)
                if oai_ids:
                    out += "<datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\"><subfield code=\"%s\">%s</subfield></datafield>\n" % \
                           (CFG_OAI_ID_FIELD[0:3],
                            CFG_OAI_ID_FIELD[3:4],
                            CFG_OAI_ID_FIELD[4:5],
                            CFG_OAI_ID_FIELD[5:6],
                            oai_ids[0])
                out += "<datafield tag=\"980\" ind1=\" \" ind2=\" \"><subfield code=\"c\">DELETED</subfield></datafield>\n"
            else:
                # controlfields
                query = "SELECT b.tag,b.value,bb.field_number FROM bib00x AS b, bibrec_bib00x AS bb "\
                        "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '00%%' "\
                        "ORDER BY bb.field_number, b.tag ASC" % recID
                res = run_sql(query)
                for row in res:
                    field, value = row[0], row[1]
                    value = encode_for_xml(value)
                    out += """        <controlfield tag="%s">%s</controlfield>\n""" % \
                           (encode_for_xml(field[0:3]), value)
                # datafields
                i = 1 # Do not process bib00x and bibrec_bib00x, as
                      # they are controlfields. So start at bib01x and
                      # bibrec_bib00x (and set i = 0 at the end of
                      # first loop)
                for digit1 in range(0, 10):
                    for digit2 in range(i, 10):
                        bx = "bib%d%dx" % (digit1, digit2)
                        bibx = "bibrec_bib%d%dx" % (digit1, digit2)
                        query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
                                "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '%s%%' "\
                                "ORDER BY bb.field_number, b.tag ASC" % (bx,
                                                                         bibx,
                                                                         recID,
                                                                         str(digit1)+str(digit2))
                        res = run_sql(query)
                        field_number_old = -999
                        field_old = ""
                        for row in res:
                            field, value, field_number = row[0], row[1], row[2]
                            ind1, ind2 = field[3], field[4]
                            if ind1 == "_" or ind1 == "":
                                ind1 = " "
                            if ind2 == "_" or ind2 == "":
                                ind2 = " "
                            # print field tag
                            if field_number != field_number_old or \
                                   field[:-1] != field_old[:-1]:
                                if field_number_old != -999:
                                    out += """        </datafield>\n"""
                                out += """        <datafield tag="%s" ind1="%s" ind2="%s">\n""" % \
                                       (encode_for_xml(field[0:3]),
                                        encode_for_xml(ind1),
                                        encode_for_xml(ind2))
                                field_number_old = field_number
                                field_old = field
                            # print subfield value
                            value = encode_for_xml(value)
                            out += """            <subfield code="%s">%s</subfield>\n""" % \
                                   (encode_for_xml(field[-1:]), value)

                        # all fields/subfields printed in this run, so close the tag:
                        if field_number_old != -999:
                            out += """        </datafield>\n"""
                    i = 0 # Next loop should start looking at bib%0 and bibrec_bib00x
            # we are at the end of printing the record:
            out += "    </record>\n"

    elif format == "xd" or format == "oai_dc":
        # XML Dublin Core format, possibly OAI -- select only some bibXXx fields:
        out += """    <dc xmlns="http://purl.org/dc/elements/1.1/"
                         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                         xsi:schemaLocation="http://purl.org/dc/elements/1.1/
                                             http://www.openarchives.org/OAI/1.1/dc.xsd">\n"""
        if record_exist_p == -1:
            out += ""
        else:
            for f in get_fieldvalues(recID, "041__a"):
                out += "        <language>%s</language>\n" % f

            for f in get_fieldvalues(recID, "100__a"):
                out += "        <creator>%s</creator>\n" % encode_for_xml(f)

            for f in get_fieldvalues(recID, "700__a"):
                out += "        <creator>%s</creator>\n" % encode_for_xml(f)

            for f in get_fieldvalues(recID, "245__a"):
                out += "        <title>%s</title>\n" % encode_for_xml(f)

            for f in get_fieldvalues(recID, "65017a"):
                out += "        <subject>%s</subject>\n" % encode_for_xml(f)

            for f in get_fieldvalues(recID, "8564_u"):
                out += "        <identifier>%s</identifier>\n" % encode_for_xml(f)

            for f in get_fieldvalues(recID, "520__a"):
                out += "        <description>%s</description>\n" % encode_for_xml(f)

            out += "        <date>%s</date>\n" % get_creation_date(recID)
        out += "    </dc>\n"


    # print record closing tags, if needed:
    if format == "marcxml" or format == "oai_dc":
        out += "   </metadata>\n"
        out += "  </record>\n"

    return out
Пример #23
0
def record_get_xml(recID,
                   format='xm',
                   decompress=zlib.decompress,
                   on_the_fly=False):
    """
    Returns an XML string of the record given by recID.

    The function builds the XML directly from the database,
    without using the standard formatting process.

    'format' allows to define the flavour of XML:
        - 'xm' for standard XML
        - 'marcxml' for MARC XML
        - 'oai_dc' for OAI Dublin Core
        - 'xd' for XML Dublin Core

    If record does not exist, returns empty string.
    If the record is deleted, returns an empty MARCXML (with recid
    controlfield, OAI ID fields and 980__c=DELETED)

    @param recID: the id of the record to retrieve
    @param on_the_fly: if False, try to fetch precreated one in database
    @return: the xml string of the record
    """
    from invenio.search_engine import record_exists

    def get_fieldvalues(recID, tag):
        """Return list of field values for field TAG inside record RECID."""
        out = []
        if tag == "001___":
            # we have asked for recID that is not stored in bibXXx tables
            out.append(str(recID))
        else:
            # we are going to look inside bibXXx tables
            digit = tag[0:2]
            bx = "bib%sx" % digit
            bibx = "bibrec_bib%sx" % digit
            query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag LIKE '%s'" \
                    "ORDER BY bibx.field_number, bx.tag ASC" % (bx, bibx, recID, tag)
            res = run_sql(query)
            for row in res:
                out.append(row[0])
        return out

    def get_creation_date(recID, fmt="%Y-%m-%d"):
        "Returns the creation date of the record 'recID'."
        out = ""
        res = run_sql(
            "SELECT DATE_FORMAT(creation_date,%s) FROM bibrec WHERE id=%s",
            (fmt, recID), 1)
        if res:
            out = res[0][0]
        return out

    def get_modification_date(recID, fmt="%Y-%m-%d"):
        "Returns the date of last modification for the record 'recID'."
        out = ""
        res = run_sql(
            "SELECT DATE_FORMAT(modification_date,%s) FROM bibrec WHERE id=%s",
            (fmt, recID), 1)
        if res:
            out = res[0][0]
        return out

    #_ = gettext_set_language(ln)

    out = ""

    # sanity check:
    record_exist_p = record_exists(recID)
    if record_exist_p == 0:  # doesn't exist
        return out

    # print record opening tags, if needed:
    if format == "marcxml" or format == "oai_dc":
        out += "  <record>\n"
        out += "   <header>\n"
        for identifier in get_fieldvalues(recID, CFG_OAI_ID_FIELD):
            out += "    <identifier>%s</identifier>\n" % identifier
        out += "    <datestamp>%s</datestamp>\n" % get_modification_date(recID)
        out += "   </header>\n"
        out += "   <metadata>\n"

    if format.startswith("xm") or format == "marcxml":
        res = None
        if on_the_fly == False:
            # look for cached format existence:
            query = """SELECT value FROM bibfmt WHERE
            id_bibrec='%s' AND format='%s'""" % (recID, format)
            res = run_sql(query, None, 1)
        if res and record_exist_p == 1:
            # record 'recID' is formatted in 'format', so print it
            out += "%s" % decompress(res[0][0])
        else:
            # record 'recID' is not formatted in 'format' -- they are
            # not in "bibfmt" table; so fetch all the data from
            # "bibXXx" tables:
            if format == "marcxml":
                out += """    <record xmlns="http://www.loc.gov/MARC21/slim">\n"""
                out += "        <controlfield tag=\"001\">%d</controlfield>\n" % int(
                    recID)
            elif format.startswith("xm"):
                out += """    <record>\n"""
                out += "        <controlfield tag=\"001\">%d</controlfield>\n" % int(
                    recID)
            if record_exist_p == -1:
                # deleted record, so display only OAI ID and 980:
                oai_ids = get_fieldvalues(recID, CFG_OAI_ID_FIELD)
                if oai_ids:
                    out += "<datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\"><subfield code=\"%s\">%s</subfield></datafield>\n" % \
                           (CFG_OAI_ID_FIELD[0:3],
                            CFG_OAI_ID_FIELD[3:4],
                            CFG_OAI_ID_FIELD[4:5],
                            CFG_OAI_ID_FIELD[5:6],
                            oai_ids[0])
                out += "<datafield tag=\"980\" ind1=\" \" ind2=\" \"><subfield code=\"c\">DELETED</subfield></datafield>\n"
            else:
                # controlfields
                query = "SELECT b.tag,b.value,bb.field_number FROM bib00x AS b, bibrec_bib00x AS bb "\
                        "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '00%%' "\
                        "ORDER BY bb.field_number, b.tag ASC" % recID
                res = run_sql(query)
                for row in res:
                    field, value = row[0], row[1]
                    value = encode_for_xml(value)
                    out += """        <controlfield tag="%s">%s</controlfield>\n""" % \
                           (encode_for_xml(field[0:3]), value)
                # datafields
                i = 1  # Do not process bib00x and bibrec_bib00x, as
                # they are controlfields. So start at bib01x and
                # bibrec_bib00x (and set i = 0 at the end of
                # first loop)
                for digit1 in range(0, 10):
                    for digit2 in range(i, 10):
                        bx = "bib%d%dx" % (digit1, digit2)
                        bibx = "bibrec_bib%d%dx" % (digit1, digit2)
                        query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
                                "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '%s%%' "\
                                "ORDER BY bb.field_number, b.tag ASC" % (bx,
                                                                         bibx,
                                                                         recID,
                                                                         str(digit1)+str(digit2))
                        res = run_sql(query)
                        field_number_old = -999
                        field_old = ""
                        for row in res:
                            field, value, field_number = row[0], row[1], row[2]
                            ind1, ind2 = field[3], field[4]
                            if ind1 == "_" or ind1 == "":
                                ind1 = " "
                            if ind2 == "_" or ind2 == "":
                                ind2 = " "
                            # print field tag
                            if field_number != field_number_old or \
                                   field[:-1] != field_old[:-1]:
                                if field_number_old != -999:
                                    out += """        </datafield>\n"""
                                out += """        <datafield tag="%s" ind1="%s" ind2="%s">\n""" % \
                                       (encode_for_xml(field[0:3]),
                                        encode_for_xml(ind1),
                                        encode_for_xml(ind2))
                                field_number_old = field_number
                                field_old = field
                            # print subfield value
                            value = encode_for_xml(value)
                            out += """            <subfield code="%s">%s</subfield>\n""" % \
                                   (encode_for_xml(field[-1:]), value)

                        # all fields/subfields printed in this run, so close the tag:
                        if field_number_old != -999:
                            out += """        </datafield>\n"""
                    i = 0  # Next loop should start looking at bib%0 and bibrec_bib00x
            # we are at the end of printing the record:
            out += "    </record>\n"

    elif format == "xd" or format == "oai_dc":
        # XML Dublin Core format, possibly OAI -- select only some bibXXx fields:
        out += """    <dc xmlns="http://purl.org/dc/elements/1.1/"
                         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                         xsi:schemaLocation="http://purl.org/dc/elements/1.1/
                                             http://www.openarchives.org/OAI/1.1/dc.xsd">\n"""
        if record_exist_p == -1:
            out += ""
        else:
            for f in get_fieldvalues(recID, "041__a"):
                out += "        <language>%s</language>\n" % f

            for f in get_fieldvalues(recID, "100__a"):
                out += "        <creator>%s</creator>\n" % encode_for_xml(f)

            for f in get_fieldvalues(recID, "700__a"):
                out += "        <creator>%s</creator>\n" % encode_for_xml(f)

            for f in get_fieldvalues(recID, "245__a"):
                out += "        <title>%s</title>\n" % encode_for_xml(f)

            for f in get_fieldvalues(recID, "65017a"):
                out += "        <subject>%s</subject>\n" % encode_for_xml(f)

            for f in get_fieldvalues(recID, "8564_u"):
                out += "        <identifier>%s</identifier>\n" % encode_for_xml(
                    f)

            for f in get_fieldvalues(recID, "520__a"):
                out += "        <description>%s</description>\n" % encode_for_xml(
                    f)

            out += "        <date>%s</date>\n" % get_creation_date(recID)
        out += "    </dc>\n"

    # print record closing tags, if needed:
    if format == "marcxml" or format == "oai_dc":
        out += "   </metadata>\n"
        out += "  </record>\n"

    return out
 def encode_for_marcxml(value):
     from invenio.textutils import encode_for_xml
     if isinstance(value, unicode):
         value = value.encode('utf8')
     return encode_for_xml(str(value))
Пример #25
0
 def encode_for_marcxml(value):
     from invenio.textutils import encode_for_xml
     if isinstance(value, unicode):
         value = value.encode('utf8')
     return encode_for_xml(str(value))
Пример #26
0
        def encode_for_marcxml(value):
            from invenio.textutils import encode_for_xml

            return encode_for_xml(str(value))
Пример #27
0
                    continue
            tempfile.tempdir = CFG_TMPSHAREDDIR
            # Move document to be uploaded to temporary folder
            tmp_file = tempfile.mktemp(prefix=identifier + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime()) + "_", suffix=extension)
            shutil.copy(os.path.join(folder, docfile), tmp_file)
            # Create MARC temporary file with FFT tag and call bibupload
            filename = tempfile.mktemp(prefix=identifier + '_')
            filedesc = open(filename, 'w')
            marc_content = """ <record>
                                    <controlfield tag="001">%(rec_id)s</controlfield>
                                        <datafield tag="FFT" ind1=" " ind2=" ">
                                            <subfield code="n">%(name)s</subfield>
                                            <subfield code="a">%(path)s</subfield>
                                        </datafield>
                               </record> """ % {'rec_id': rec_id,
                                                'name': encode_for_xml(identifier),
                                                'path': encode_for_xml(tmp_file),
                                                }
            filedesc.write(marc_content)
            filedesc.close()
            info[1].append(docfile)
            user = ""
            if req is not None:
                user_info = collect_user_info(req)
                user = user_info['nickname']
            if not user:
                user = "******"
            # Execute bibupload with the appropiate mode

            task_arguments = ('bibupload', user, "--" + mode, "--name=" + docfile, "--priority=" + priority)