Пример #1
0
 def test_illegal_characters_washing_1_1(self):
     """textutils - washing illegal characters for XML 1.1."""
     self.assertEqual(
         wash_for_xml(chr(8) + chr(9) + 'some chars', xml_version='1.1'),
         '\x08\tsome chars')
     self.assertEqual(wash_for_xml('$b\bar{b}$', xml_version='1.1'),
                      '$b\x08ar{b}$')
 def test_greek_characters_washing_1_1(self):
     """textutils - washing greek characters for XML 1.1."""
     self.assertEqual(
         wash_for_xml(
             """
     ἄνδρα μοι ἔννεπε, μου̂σα, πολύτροπον, ὃς μάλα πολλὰ
     πλάγχθη, ἐπεὶ Τροίης ἱερὸν πτολίεθρον ἔπερσεν:
     πολλω̂ν δ' ἀνθρώπων ἴδεν ἄστεα καὶ νόον ἔγνω,
     πολλὰ δ' ὅ γ' ἐν πόντῳ πάθεν ἄλγεα ὃν κατὰ θυμόν,
     ἀρνύμενος ἥν τε ψυχὴν καὶ νόστον ἑταίρων.
     ἀλλ' οὐδ' ὣς ἑτάρους ἐρρύσατο, ἱέμενός περ:
     αὐτω̂ν γὰρ σφετέρῃσιν ἀτασθαλίῃσιν ὄλοντο,
     νήπιοι, οἳ κατὰ βου̂ς  ̔Υπερίονος  ̓Ηελίοιο
     ἤσθιον: αὐτὰρ ὁ τοι̂σιν ἀφείλετο νόστιμον ἠ̂μαρ.
     τω̂ν ἁμόθεν γε, θεά, θύγατερ Διός, εἰπὲ καὶ ἡμι̂ν.""",
             xml_version="1.1",
         ),
         """
     ἄνδρα μοι ἔννεπε, μου̂σα, πολύτροπον, ὃς μάλα πολλὰ
     πλάγχθη, ἐπεὶ Τροίης ἱερὸν πτολίεθρον ἔπερσεν:
     πολλω̂ν δ' ἀνθρώπων ἴδεν ἄστεα καὶ νόον ἔγνω,
     πολλὰ δ' ὅ γ' ἐν πόντῳ πάθεν ἄλγεα ὃν κατὰ θυμόν,
     ἀρνύμενος ἥν τε ψυχὴν καὶ νόστον ἑταίρων.
     ἀλλ' οὐδ' ὣς ἑτάρους ἐρρύσατο, ἱέμενός περ:
     αὐτω̂ν γὰρ σφετέρῃσιν ἀτασθαλίῃσιν ὄλοντο,
     νήπιοι, οἳ κατὰ βου̂ς  ̔Υπερίονος  ̓Ηελίοιο
     ἤσθιον: αὐτὰρ ὁ τοι̂σιν ἀφείλετο νόστιμον ἠ̂μαρ.
     τω̂ν ἁμόθεν γε, θεά, θύγατερ Διός, εἰπὲ καὶ ἡμι̂ν.""",
     )
Пример #3
0
    def assertPublicationMetadata(self, pub_id, expected_metadata):
        """
        Assert that field values of a publication is equal to those
        given in the expected_metadata dictionary.
        """
        pub = OpenAIREPublication(self.user_id, publicationid=pub_id)
        metadata = pub.metadata

        for field, expected_val in expected_metadata.items():
            if field == 'projects':
                continue
            expected_val = wash_for_xml(expected_val)
            real_val = metadata.get(field, None)
            if field in ['related_publications', 'related_datasets']:
                # Remove "doi:" and filter out blank strings.
                real_val = filter(lambda x: x, real_val.split("\n"))

                def _map_func(x):
                    if x.startswith("doi:"):
                        return x[4:]
                    else:
                        return x

                expected_val = filter(lambda x: x,
                                      map(_map_func, expected_val.split("\n")))
            self.assertEqual(
                real_val, expected_val, "Field %s: expected %s but got %s" %
                (field, expected_val, real_val))
Пример #4
0
 def test_greek_characters_washing_1_1(self):
     """textutils - washing greek characters for XML 1.1."""
     self.assertEqual(
         wash_for_xml('''
     ἄνδρα μοι ἔννεπε, μου̂σα, πολύτροπον, ὃς μάλα πολλὰ
     πλάγχθη, ἐπεὶ Τροίης ἱερὸν πτολίεθρον ἔπερσεν:
     πολλω̂ν δ' ἀνθρώπων ἴδεν ἄστεα καὶ νόον ἔγνω,
     πολλὰ δ' ὅ γ' ἐν πόντῳ πάθεν ἄλγεα ὃν κατὰ θυμόν,
     ἀρνύμενος ἥν τε ψυχὴν καὶ νόστον ἑταίρων.
     ἀλλ' οὐδ' ὣς ἑτάρους ἐρρύσατο, ἱέμενός περ:
     αὐτω̂ν γὰρ σφετέρῃσιν ἀτασθαλίῃσιν ὄλοντο,
     νήπιοι, οἳ κατὰ βου̂ς  ̔Υπερίονος  ̓Ηελίοιο
     ἤσθιον: αὐτὰρ ὁ τοι̂σιν ἀφείλετο νόστιμον ἠ̂μαρ.
     τω̂ν ἁμόθεν γε, θεά, θύγατερ Διός, εἰπὲ καὶ ἡμι̂ν.''',
                      xml_version='1.1'), '''
     ἄνδρα μοι ἔννεπε, μου̂σα, πολύτροπον, ὃς μάλα πολλὰ
     πλάγχθη, ἐπεὶ Τροίης ἱερὸν πτολίεθρον ἔπερσεν:
     πολλω̂ν δ' ἀνθρώπων ἴδεν ἄστεα καὶ νόον ἔγνω,
     πολλὰ δ' ὅ γ' ἐν πόντῳ πάθεν ἄλγεα ὃν κατὰ θυμόν,
     ἀρνύμενος ἥν τε ψυχὴν καὶ νόστον ἑταίρων.
     ἀλλ' οὐδ' ὣς ἑτάρους ἐρρύσατο, ἱέμενός περ:
     αὐτω̂ν γὰρ σφετέρῃσιν ἀτασθαλίῃσιν ὄλοντο,
     νήπιοι, οἳ κατὰ βου̂ς  ̔Υπερίονος  ̓Ηελίοιο
     ἤσθιον: αὐτὰρ ὁ τοι̂σιν ἀφείλετο νόστιμον ἠ̂μαρ.
     τω̂ν ἁμόθεν γε, θεά, θύγατερ Διός, εἰπὲ καὶ ἡμι̂ν.''')
    def assertPublicationMetadata(self, pub_id, expected_metadata):
        """
        Assert that field values of a publication is equal to those
        given in the expected_metadata dictionary.
        """
        pub = OpenAIREPublication(self.user_id, publicationid=pub_id)
        metadata = pub.metadata

        for field, expected_val in expected_metadata.items():
            if field == 'projects':
                continue
            expected_val = wash_for_xml(expected_val)
            real_val = metadata.get(field, None)
            if field in ['related_publications','related_datasets']:
                # Remove "doi:" and filter out blank strings.
                real_val = filter(lambda x: x, real_val.split("\n"))

                def _map_func(x):
                    if x.startswith("doi:"):
                        return x[4:]
                    else:
                        return x
                expected_val = filter(
                    lambda x: x, map(_map_func, expected_val.split("\n")))
            self.assertEqual(real_val, expected_val, "Field %s: expected %s but got %s" % (field, expected_val, real_val))
 def test_russian_characters_washing_1_1(self):
     """textutils - washing greek characters for XML 1.1."""
     self.assertEqual(
         wash_for_xml(
             """
     В тени дерев, над чистыми водами
     Дерновый холм вы видите ль, друзья?
     Чуть слышно там плескает в брег струя;
     Чуть ветерок там дышит меж листами;
     На ветвях лира и венец...
     Увы! друзья, сей холм - могила;
     Здесь прах певца земля сокрыла;
     Бедный певец!""",
             xml_version="1.1",
         ),
         """
     В тени дерев, над чистыми водами
     Дерновый холм вы видите ль, друзья?
     Чуть слышно там плескает в брег струя;
     Чуть ветерок там дышит меж листами;
     На ветвях лира и венец...
     Увы! друзья, сей холм - могила;
     Здесь прах певца земля сокрыла;
     Бедный певец!""",
     )
Пример #7
0
def save_xml_record(recid,
                    uid,
                    xml_record='',
                    to_upload=True,
                    to_merge=False,
                    spec_name=''):
    """Write XML record to file. Default behaviour is to read the record from
    a BibEdit cache file, filter out the unchanged volatile subfields,
    write it back to an XML file and then pass this file to BibUpload.

    @param xml_record: give XML as string in stead of reading cache file
    @param to_upload: pass the XML file to BibUpload
    @param to_merge: prepare an XML file for BibMerge to use

    """
    if not xml_record:
        # Read record from cache file.
        cache = get_cache_file_contents(recid, uid)
        if cache:
            record = cache[2]
            used_changes = cache[4]
            #            record_strip_empty_fields(record) # now performed for every record after removing unfilled volatile fields
            xml_record = record_xml_output(record)
            delete_cache_file(recid, uid)
            delete_disabled_changes(used_changes)
    else:
        record = create_record(xml_record)[0]

    # clean the record from unfilled volatile fields
    record_strip_empty_volatile_subfields(record)
    record_strip_empty_fields(record)

    # order subfields alphabetically before saving the record
    #TP: nechceme    record_order_subfields(record)

    xml_to_write = wash_for_xml(record_xml_output(record))

    # Write XML file.
    if not to_merge:
        file_path = '%s.xml' % _get_file_path(recid, uid)
    else:
        file_path = '%s_%s.xml' % (_get_file_path(
            recid, uid), CFG_BIBEDIT_TO_MERGE_SUFFIX)
    xml_file = open(file_path, 'w')
    xml_file.write(xml_to_write)
    xml_file.close()

    user_name = get_user_info(uid)[1]
    if to_upload:
        # TP: check whether to add spec name
        if spec_name == '':
            # Pass XML file to BibUpload.
            task_low_level_submission('bibupload', 'bibedit', '-P', '5', '-r',
                                      file_path, '-u', user_name)
        else:
            task_low_level_submission('bibupload', 'bibedit', '-P', '5', '-r',
                                      file_path, '-u', user_name, '-N',
                                      spec_name)
    return True
Пример #8
0
def save_xml_record(recid, uid, xml_record='', to_upload=True, to_merge=False,
                    task_name="bibedit", sequence_id=None):
    """Write XML record to file. Default behaviour is to read the record from
    a BibEdit cache file, filter out the unchanged volatile subfields,
    write it back to an XML file and then pass this file to BibUpload.

    @param xml_record: give XML as string in stead of reading cache file
    @param to_upload: pass the XML file to BibUpload
    @param to_merge: prepare an XML file for BibMerge to use

    """
    if not xml_record:
        # Read record from cache file.
        cache = get_cache_contents(recid, uid)
        if cache:
            record = cache[2]
            used_changes = cache[4]
            xml_record = record_xml_output(record)
            delete_cache(recid, uid)
            delete_disabled_changes(used_changes)
    else:
        record = create_record(xml_record)[0]

    # clean the record from unfilled volatile fields
    record_strip_empty_volatile_subfields(record)
    record_strip_empty_fields(record)

    # order subfields alphabetically before saving the record
    record_order_subfields(record)

    xml_to_write = wash_for_xml(record_xml_output(record))

    # Write XML file.
    if not to_merge:
        fd, file_path = tempfile.mkstemp(dir=CFG_BIBEDIT_CACHEDIR,
                                         prefix="%s_" % CFG_BIBEDIT_FILENAME,
                                         suffix="_%s_%s.xml" % (recid, uid))
        f = os.fdopen(fd, 'w')
        f.write(xml_to_write)
        f.close()
    else:
        file_path = '%s_%s.xml' % (_get_file_path(recid, uid),
                                   CFG_BIBEDIT_TO_MERGE_SUFFIX)
        xml_file = open(file_path, 'w')
        xml_file.write(xml_to_write)
        xml_file.close()

    user_name = get_user_info(uid)[1]
    if to_upload:
        args = ['bibupload', user_name, '-P', '5', '-r',
                file_path, '-u', user_name]
        if task_name == "bibedit":
            args.extend(['--name', 'bibedit'])
        if sequence_id:
            args.extend(["-I", sequence_id])
        args.extend(['--email-logs-on-error'])
        task_low_level_submission(*args)
    return True
Пример #9
0
def Make_Record(parameters, curdir, form, user_info=None):
    """
    This function creates the record file formatted for a direct
    insertion in the documents database. It uses the BibConvert tool.  The
    main difference between all the Make_..._Record functions are the
    parameters.

    As its name does not say :), this particular function should be
    used for the submission of a document.

       * createTemplate: Name of bibconvert's configuration file used
                         for creating the mysql record.

       * sourceTemplate: Name of bibconvert's source file.
    """
    # Get rid of "invisible" white spaces
    source = parameters['sourceTemplate'].replace(" ", "")
    create = parameters['createTemplate'].replace(" ", "")
    # We use bibconvert to create the xml record
    call_uploader_txt = "%s/bibconvert -l1 -d'%s'  -Cs'%s/%s' -Ct'%s/%s' > %s/recmysql" % (
        CFG_BINDIR, curdir, CFG_WEBSUBMIT_BIBCONVERTCONFIGDIR, source,
        CFG_WEBSUBMIT_BIBCONVERTCONFIGDIR, create, curdir)
    os.system(call_uploader_txt)
    # Then we have to format this record (turn & into &amp; and < into &lt;
    # After all we know nothing about the text entered by the users at submission time
    if os.path.exists("%s/recmysql" % curdir):
        fp = open("%s/recmysql" % curdir, "r")
        rectext = fp.read()
        fp.close()
    else:
        raise InvenioWebSubmitFunctionError("Cannot create database record")

    if not rectext:
        raise InvenioWebSubmitFunctionError("Empty record!")

    # Escape XML-reserved chars and clean the unsupported ones (mainly
    # control characters)
    rectext = wash_for_xml(rectext)
    # First of all the &
    rectext = rectext.replace("&amp;", "&")
    rectext = rectext.replace("&", "&amp;")
    # Then the < - More difficult!
    rectext = rectext.replace("<", "&lt;")
    rectext = rectext.replace("&lt;record", "<record")
    rectext = rectext.replace("&lt;/record", "</record")
    rectext = rectext.replace("&lt;datafield", "<datafield")
    rectext = rectext.replace("&lt;/datafield", "</datafield")
    rectext = rectext.replace("&lt;controlfield", "<controlfield")
    rectext = rectext.replace("&lt;/controlfield", "</controlfield")
    rectext = rectext.replace("&lt;subfield", "<subfield")
    rectext = rectext.replace("&lt;/subfield", "</subfield")
    # Save the record back
    fp = open("%s/recmysql" % curdir, "w")
    fp.write(rectext)
    fp.close()
    return ""
Пример #10
0
def Make_Record(parameters, curdir, form, user_info=None):
    """
    This function creates the record file formatted for a direct
    insertion in the documents database. It uses the BibConvert tool.  The
    main difference between all the Make_..._Record functions are the
    parameters.

    As its name does not say :), this particular function should be
    used for the submission of a document.

       * createTemplate: Name of bibconvert's configuration file used
                         for creating the mysql record.

       * sourceTemplate: Name of bibconvert's source file.
    """
    # Get rid of "invisible" white spaces
    source = parameters['sourceTemplate'].replace(" ","")
    create = parameters['createTemplate'].replace(" ","")
    # We use bibconvert to create the xml record
    call_uploader_txt = "%s/bibconvert -l1 -d'%s'  -Cs'%s/%s' -Ct'%s/%s' > %s/recmysql" % (CFG_BINDIR,curdir,CFG_WEBSUBMIT_BIBCONVERTCONFIGDIR,source,CFG_WEBSUBMIT_BIBCONVERTCONFIGDIR,create,curdir)
    os.system(call_uploader_txt)
    # Then we have to format this record (turn & into &amp; and < into &lt;
    # After all we know nothing about the text entered by the users at submission time
    if os.path.exists("%s/recmysql" % curdir):
        fp = open("%s/recmysql" % curdir,"r")
        rectext = fp.read()
        fp.close()
    else:
        raise InvenioWebSubmitFunctionError("Cannot create database record")

    if not rectext:
        raise InvenioWebSubmitFunctionError("Empty record!")

    # Escape XML-reserved chars and clean the unsupported ones (mainly
    # control characters)
    rectext = wash_for_xml(rectext)
    # First of all the &
    rectext = rectext.replace("&amp;","&")
    rectext = rectext.replace("&","&amp;")
    # Then the < - More difficult!
    rectext = rectext.replace("<","&lt;")
    rectext = rectext.replace("&lt;record","<record")
    rectext = rectext.replace("&lt;/record","</record")
    rectext = rectext.replace("&lt;datafield","<datafield")
    rectext = rectext.replace("&lt;/datafield","</datafield")
    rectext = rectext.replace("&lt;controlfield","<controlfield")
    rectext = rectext.replace("&lt;/controlfield","</controlfield")
    rectext = rectext.replace("&lt;subfield","<subfield")
    rectext = rectext.replace("&lt;/subfield","</subfield")
    # Save the record back
    fp = open("%s/recmysql" % curdir,"w")
    fp.write(rectext)
    fp.close()
    return ""
Пример #11
0
def save_xml_record(recid, uid, xml_record='', to_upload=True, to_merge=False, spec_name=''):
    """Write XML record to file. Default behaviour is to read the record from
    a BibEdit cache file, filter out the unchanged volatile subfields,
    write it back to an XML file and then pass this file to BibUpload.

    @param xml_record: give XML as string in stead of reading cache file
    @param to_upload: pass the XML file to BibUpload
    @param to_merge: prepare an XML file for BibMerge to use

    """
    if not xml_record:
        # Read record from cache file.
        cache = get_cache_file_contents(recid, uid)
        if cache:
            record = cache[2]
            used_changes = cache[4]
#            record_strip_empty_fields(record) # now performed for every record after removing unfilled volatile fields
            xml_record = record_xml_output(record)
            delete_cache_file(recid, uid)
            delete_disabled_changes(used_changes)
    else:
        record = create_record(xml_record)[0]

    # clean the record from unfilled volatile fields
    record_strip_empty_volatile_subfields(record)
    record_strip_empty_fields(record)

    # order subfields alphabetically before saving the record
#TP: nechceme    record_order_subfields(record)

    xml_to_write = wash_for_xml(record_xml_output(record))

    # Write XML file.
    if not to_merge:
        file_path = '%s.xml' % _get_file_path(recid, uid)
    else:
        file_path = '%s_%s.xml' % (_get_file_path(recid, uid),
                                   CFG_BIBEDIT_TO_MERGE_SUFFIX)
    xml_file = open(file_path, 'w')
    xml_file.write(xml_to_write)
    xml_file.close()

    user_name = get_user_info(uid)[1]
    if to_upload:
        # TP: check whether to add spec name
        if spec_name == '':
            # Pass XML file to BibUpload.
            task_low_level_submission('bibupload', 'bibedit', '-P', '5', '-r',
                                      file_path, '-u', user_name)
        else:
            task_low_level_submission('bibupload', 'bibedit', '-P', '5', '-r',
                                      file_path, '-u', user_name, '-N', spec_name)
    return True
Пример #12
0
 def test_chinese_characters_washing_1_1(self):
     """textutils - washing chinese characters for XML 1.1."""
     self.assertEqual(wash_for_xml('''
     春眠暁を覚えず
     処処に啼鳥と聞く
     夜来風雨の声
     花落つること
     知んぬ多少ぞ''', xml_version='1.1'), '''
     春眠暁を覚えず
     処処に啼鳥と聞く
     夜来風雨の声
     花落つること
     知んぬ多少ぞ''')
Пример #13
0
def washXMLcontrols(fieldtext):
    """First wash input with invenio.textutils.wash_for_xml()

    Then turn XML controls into respective characters
    i.e. turn &amp; into & , &lt; into < , &gt; into > and  &quot; into "

    Escaping is done afterwards in MarcXMLDocument (xml.dom.minidom)
    """

    fieldtext = wash_for_xml(fieldtext)

    fieldtext = fieldtext.replace("&amp;","&")
    fieldtext = fieldtext.replace("&lt;","<")
    fieldtext = fieldtext.replace("&gt;",">")
    fieldtext = fieldtext.replace("&quot;",'"')

    return fieldtext
 def test_chinese_characters_washing_1_0(self):
     """textutils - washing chinese characters for XML 1.0."""
     self.assertEqual(
         wash_for_xml(
             """
     春眠暁を覚えず
     処処に啼鳥と聞く
     夜来風雨の声
     花落つること
     知んぬ多少ぞ"""
         ),
         """
     春眠暁を覚えず
     処処に啼鳥と聞く
     夜来風雨の声
     花落つること
     知んぬ多少ぞ""",
     )
Пример #15
0
def wash_tag_silent(tag_name):
    """
    Whitespace and character cleanup.

    @param tag_name: Single tag.
    @return: Tag Unicode string with all whitespace characters replaced with
    Unicode single space (' '), no whitespace at the start and end of the tags,
    no duplicate whitespace, and only characters valid in XML 1.0.
    Also applies list of replacements from CFG_WEBTAG_REPLACEMENTS_SILENT.

    Examples:
    >>> print(_tag_cleanup('Well formatted string: Should not be changed'))
    Well formatted string: Should not be changed
    >>> print(_tag_cleanup('double  space  characters'))
    double space characters
    >>> print(_tag_cleanup('All\\tthe\\ndifferent\\x0bwhitespace\\x0cin\\rone go'))
    All the different whitespace in one go
    >>> print(_tag_cleanup(' Preceding whitespace'))
    Preceding whitespace
    >>> print(_tag_cleanup('Trailing whitespace '))
    Trailing whitespace
    >>> print(_tag_cleanup('  Preceding and trailing double whitespace  '))
    Preceding and trailing double whitespace
    >>> _tag_cleanup(unichr(CFG_WEBTAG_LAST_MYSQL_CHARACTER))
    u''
    >>> from string import whitespace
    >>> _tag_cleanup(whitespace)
    ''
    """
    if tag_name is None:
        return None

    # convert to string
    if type(tag_name) == unicode:
        tag_name = tag_name.encode('utf-8')

    # wash_for_xml
    tag_name = wash_for_xml(tag_name)

    # replacements
    tag_name = _apply_replacements(COMPILED_REPLACEMENTS_SILENT, tag_name)

    return tag_name
Пример #16
0
 def test_russian_characters_washing_1_1(self):
     """textutils - washing greek characters for XML 1.1."""
     self.assertEqual(wash_for_xml('''
     В тени дерев, над чистыми водами
     Дерновый холм вы видите ль, друзья?
     Чуть слышно там плескает в брег струя;
     Чуть ветерок там дышит меж листами;
     На ветвях лира и венец...
     Увы! друзья, сей холм - могила;
     Здесь прах певца земля сокрыла;
     Бедный певец!''', xml_version='1.1'), '''
     В тени дерев, над чистыми водами
     Дерновый холм вы видите ль, друзья?
     Чуть слышно там плескает в брег струя;
     Чуть ветерок там дышит меж листами;
     На ветвях лира и венец...
     Увы! друзья, сей холм - могила;
     Здесь прах певца земля сокрыла;
     Бедный певец!''')
Пример #17
0
def add_record_cnum(recid, uid):
    """
    Check if the record has already a cnum. If not generate a new one
    and return the result

    @param recid: recid of the record under check. Used to retrieve cache file
    @type recid: int

    @param uid: id of the user. Used to retrieve cache file
    @type uid: int

    @return: None if cnum already present, new cnum otherwise
    @rtype: None or string
    """
    # Import placed here to avoid circular dependency
    from invenio.sequtils_cnum import CnumSeq, ConferenceNoStartDateError

    record_revision, record, pending_changes, deactivated_hp_changes, \
    undo_list, redo_list = get_cache_file_contents(recid, uid)[1:]

    record_strip_empty_volatile_subfields(record)

    # Check if record already has a cnum
    tag_111__g_content = record_get_field_value(record, "111", " ", " ", "g")
    if tag_111__g_content:
        return
    else:
        cnum_seq = CnumSeq()
        try:
            new_cnum = cnum_seq.next_value(
                xml_record=wash_for_xml(print_rec(record)))
        except ConferenceNoStartDateError:
            return None
        field_add_subfield(record['111'][0], 'g', new_cnum)
        update_cache_file_contents(recid, uid, record_revision,
                                   record, \
                                   pending_changes, \
                                   deactivated_hp_changes, \
                                   undo_list, redo_list)
        return new_cnum
Пример #18
0
def add_record_cnum(recid, uid):
    """
    Check if the record has already a cnum. If not generate a new one
    and return the result

    @param recid: recid of the record under check. Used to retrieve cache file
    @type recid: int

    @param uid: id of the user. Used to retrieve cache file
    @type uid: int

    @return: None if cnum already present, new cnum otherwise
    @rtype: None or string
    """
    # Import placed here to avoid circular dependency
    from invenio.sequtils_cnum import CnumSeq, ConferenceNoStartDateError

    record_revision, record, pending_changes, deactivated_hp_changes, \
    undo_list, redo_list = get_cache_contents(recid, uid)[1:]

    record_strip_empty_volatile_subfields(record)

    # Check if record already has a cnum
    tag_111__g_content = record_get_field_value(record, "111", " ", " ", "g")
    if tag_111__g_content:
        return
    else:
        cnum_seq = CnumSeq()
        try:
            new_cnum = cnum_seq.next_value(xml_record=wash_for_xml(print_rec(record)))
        except ConferenceNoStartDateError:
            return None
        field_add_subfield(record['111'][0], 'g', new_cnum)
        update_cache_contents(recid, uid, record_revision,
                                   record,
                                   pending_changes,
                                   deactivated_hp_changes,
                                   undo_list, redo_list)
        return new_cnum
Пример #19
0
def Convert_RecXML_to_RecALEPH(parameters, curdir, form, user_info=None):
    """
       Function to create an ALEPH 500 MARC record from a MARC XML record.

       This function depends upon the following:

         * "recmysql" is a file that already exists in the working
            submission directory. I.e. "Make_Record" has already been called and
            the MARC XML record created.

         * "recmysql" must contain an ALEPH 500 SYS in the field "970__a". That
            is to say, the function "Allocate_ALEPH_SYS" should have been called
            and an ALEPH 500 SYS allocated to this record.
            *** NOTE: "xmlmarc2textmarc" is left to check for this in the record
                      It is run in --aleph-marc=r mode, which creates an ALEPH
                      "replace" record.

       Given the valid "recmysql" in the working submission directory, this
       function will use the "xmlmarc2textmarc" tool to convert that record into
       the ALEPH MARC record. The record will then be written into the file
       "recaleph500" in the current working submission directory.

       @parameters: None
       @return: (string) - Empty string.
    """
    ## If recmysql does not exist in the current working submission directory,
    ## or it is not readable, fail by raising a InvenioWebSubmitFunctionError:
    if not access("%s/recmysql" % curdir, R_OK | W_OK):
        ## FAIL - recmysql cannot be accessed:
        msg = """No recmysql in submission dir %s - """ \
              """Cannot create recaleph500!""" % curdir
        raise InvenioWebSubmitFunctionError(msg)

    ## Wash possible xml-invalid characters in recmysql
    recmysql_fd = file(os.path.join(curdir, 'recmysql'), 'r')
    recmysql = recmysql_fd.read()
    recmysql_fd.close()

    recmysql = wash_for_xml(recmysql)

    recmysql_fd = file(os.path.join(curdir, 'recmysql'), 'w')
    recmysql_fd.write(recmysql)
    recmysql_fd.close()

    ## Command to perform conversion of recmysql -> recaleph500:
    convert_cmd = \
        """%(bindir)s/xmlmarc2textmarc --aleph-marc=r %(curdir)s/recmysql > """ \
        """%(curdir)s/recaleph500""" \
        % { 'bindir' : CFG_BINDIR,
            'curdir' : curdir,
          }
    ## Perform the conversion of MARC XML record to ALEPH500 record:
    pipe_in, pipe_out, pipe_err = os.popen3("%s" % convert_cmd)
    pipe_in.close()
    pipe_out.close()
    conversion_errors = pipe_err.readlines()
    pipe_err.close()

    ## Check that the conversion was performed without error:
    if conversion_errors != []:
        ## It was not possible to successfully create the ALEPH500 record, quit:
        msg = """An error was encountered when attempting to """ \
              """convert %s/recmysql into recaleph500 - stopping [%s]""" % (curdir, "".join(conversion_errors))
        raise InvenioWebSubmitFunctionError(msg)

    ## Check for presence of recaleph500 in the current
    ## working submission directory:
    if not access("%s/recaleph500" % curdir, R_OK | W_OK):
        ## Either not present, or not readable - ERROR
        msg = """An error was encountered when attempting to convert """ \
              """%s/recmysql into recaleph500. After the conversion, """ \
              """recaleph500 could not be accessed.""" % curdir
        raise InvenioWebSubmitFunctionError(msg)

    ## Everything went OK:
    return ""
Пример #20
0
        sys.stderr.write("Error: Missing MARCXML to analyse")
        print usage
        sys.exit(1)

    input_filename = args[0]

    if not os.path.exists(input_filename):
        sys.stderr.write("Please enter a valid filename for input.")
        sys.exit(1)
    if not os.path.exists(config_path):
        sys.stderr.write("Please enter a valid filename for config.")
        sys.exit(1)

    # Read and wash incoming data
    file_data = open_marc_file(input_filename)
    washed_data = wash_for_xml(wash_for_utf8(file_data))

    # Transform MARCXML to record structure
    records = create_records(washed_data)
    action_dict = read_actions_configuration_file(config_path)
    insert_records = []
    append_records = []
    correct_records = []
    holdingpen_records = []

    for rec in records:
        record = rec[0]
        if record is None:
            sys.stderr.write("Record is None: %s" % (rec[2],))
            sys.exit(1)
        # Perform various checks to determine an suitable action to be taken for
def Convert_RecXML_to_RecALEPH(parameters, curdir, form, user_info=None):
    """
       Function to create an ALEPH 500 MARC record from a MARC XML record.

       This function depends upon the following:

         * "recmysql" is a file that already exists in the working
            submission directory. I.e. "Make_Record" has already been called and
            the MARC XML record created.

         * "recmysql" must contain an ALEPH 500 SYS in the field "970__a". That
            is to say, the function "Allocate_ALEPH_SYS" should have been called
            and an ALEPH 500 SYS allocated to this record.
            *** NOTE: "xmlmarc2textmarc" is left to check for this in the record
                      It is run in --aleph-marc=r mode, which creates an ALEPH
                      "replace" record.

       Given the valid "recmysql" in the working submission directory, this
       function will use the "xmlmarc2textmarc" tool to convert that record into
       the ALEPH MARC record. The record will then be written into the file
       "recaleph500" in the current working submission directory.

       @parameters: None
       @return: (string) - Empty string.
    """
    ## If recmysql does not exist in the current working submission directory,
    ## or it is not readable, fail by raising a InvenioWebSubmitFunctionError:
    if not access("%s/recmysql" % curdir, R_OK|W_OK):
        ## FAIL - recmysql cannot be accessed:
        msg = """No recmysql in submission dir %s - """ \
              """Cannot create recaleph500!""" % curdir
        raise InvenioWebSubmitFunctionError(msg)

    ## Wash possible xml-invalid characters in recmysql
    recmysql_fd = file(os.path.join(curdir, 'recmysql'), 'r')
    recmysql = recmysql_fd.read()
    recmysql_fd.close()

    recmysql = wash_for_xml(recmysql)

    recmysql_fd = file(os.path.join(curdir, 'recmysql'), 'w')
    recmysql_fd.write(recmysql)
    recmysql_fd.close()

    ## Command to perform conversion of recmysql -> recaleph500:
    convert_cmd = \
        """%(bindir)s/xmlmarc2textmarc --aleph-marc=r %(curdir)s/recmysql > """ \
        """%(curdir)s/recaleph500""" \
        % { 'bindir' : CFG_BINDIR,
            'curdir' : curdir,
          }
    ## Perform the conversion of MARC XML record to ALEPH500 record:
    pipe_in, pipe_out, pipe_err = os.popen3("%s" % convert_cmd)
    pipe_in.close()
    pipe_out.close()
    conversion_errors = pipe_err.readlines()
    pipe_err.close()

    ## Check that the conversion was performed without error:
    if conversion_errors != []:
        ## It was not possible to successfully create the ALEPH500 record, quit:
        msg = """An error was encountered when attempting to """ \
              """convert %s/recmysql into recaleph500 - stopping [%s]""" % (curdir, "".join(conversion_errors))
        raise InvenioWebSubmitFunctionError(msg)

    ## Check for presence of recaleph500 in the current
    ## working submission directory:
    if not access("%s/recaleph500" % curdir, R_OK|W_OK):
        ## Either not present, or not readable - ERROR
        msg = """An error was encountered when attempting to convert """ \
              """%s/recmysql into recaleph500. After the conversion, """ \
              """recaleph500 could not be accessed.""" % curdir
        raise InvenioWebSubmitFunctionError(msg)

    ## Everything went OK:
    return ""
Пример #22
0
    input_filename = args[0]

    if not os.path.exists(input_filename):
        sys.stderr.write("Please enter a valid filename for input.")
        sys.exit(1)
    if not os.path.exists(config_path):
        sys.stderr.write("Please enter a valid filename for config.")
        sys.exit(1)

    file_data = open_marc_file(input_filename)
    try:
        # latin1 will decode anything
        decoded_data = file_data.decode('latin1')
        # utf-8 will encode anything
        encoded_data = decoded_data.encode('utf-8')
        washed_data = wash_for_xml(encoded_data)
    except UnicodeError:
        sys.stderr.write("en/decoding failed on file: %s" % (input_filename,))
        sys.exit(3)
    # Transform MARCXML to record structure
    records = create_records(washed_data)
    action_dict = read_actions_configuration_file(config_path)
    insert_records = []
    append_records = []
    correct_records = []
    holdingpen_records = []

    for rec in records:
        record = rec[0]
        # Perform various checks to determine an suitable action to be taken for
        # that particular record. Whether it will be inserted, discarded or replacing
 def test_latin_characters_washing_1_1(self):
     """textutils - washing latin characters for XML 1.1."""
     self.assertEqual(wash_for_xml('àèéìòùÀ', xml_version='1.1'), 'àèéìòùÀ')
 def test_latin_characters_washing_1_0(self):
     """textutils - washing latin characters for XML 1.0."""
     self.assertEqual(wash_for_xml('àèéìòùÀ'), 'àèéìòùÀ')
Пример #25
0
 def test_latin_characters_washing_1_0(self):
     """textutils - washing latin characters for XML 1.0."""
     self.assertEqual(wash_for_xml('àèéìòùÀ'), 'àèéìòùÀ')
 def test_illegal_characters_washing_1_0(self):
     """textutils - washing illegal characters for XML 1.0."""
     self.assertEqual(wash_for_xml(chr(8) + chr(9) + 'some chars'), '\tsome chars')
     self.assertEqual(wash_for_xml('$b\bar{b}$'), '$bar{b}$')
Пример #27
0
        sys.stderr.write("Error: Missing MARCXML to analyse")
        print usage
        sys.exit(1)

    input_filename = args[0]

    if not os.path.exists(input_filename):
        sys.stderr.write("Please enter a valid filename for input.")
        sys.exit(1)
    if not os.path.exists(config_path):
        sys.stderr.write("Please enter a valid filename for config.")
        sys.exit(1)

    # Read and wash incoming data
    file_data = open_marc_file(input_filename)
    washed_data = wash_for_xml(wash_for_utf8(file_data))

    # Transform MARCXML to record structure
    records = create_records(washed_data)
    action_dict = read_actions_configuration_file(config_path)
    insert_records = []
    append_records = []
    correct_records = []
    holdingpen_records = []

    for rec in records:
        record = rec[0]
        if record is None:
            sys.stderr.write("Record is None: %s" % (rec[2], ))
            sys.exit(1)
        # Perform various checks to determine an suitable action to be taken for
def Make_Dummy_MARC_XML_Record(parameters, curdir, form, user_info=None):
    """
    Make a dummy MARC XML record and store it in a submission's working-
    directory.
    This dummy record is not intended to be inserted into the Invenio
    repository. Rather, it is intended as a way for other submission-
    related functionalities to have access to the data submitted without
    necessarily having to know the names of the files in which the
    values were stored.
    An example could be the publiline service: by using a dummy record
    in the submission's directory in would be able to access an item's
    information (e.g. title, etc) without having to know the name of the
    title file, etc.
    Another use for the dummy record could be, for example, creating a
    preview of the submitted record information with bibconvert.

    @param parameters: (dictionary) - must contain:

          + dummyrec_source_tpl: (string) - the name of the bibconvert
            source template used for the creation of the dummy record.

          + dummyrec_create_tpl: (string) - the name of the bibconvert
            create template used for the creation of the dummy record.

    @param curdir: (string) - the current submission's working
                              directory.

    @param form: (dictionary) - form fields.

    @param user_info: (dictionary) - various information about the
                                     submitting user (includes the
                                     apache req object).

    @return: (string) - empty string.

    @Exceptions raised: InvenioWebSubmitFunctionError when an
                        unexpected error is encountered.
    """
    ## Get the apache request object from user_info: (we may use it for
    ## error reporting)
    try:
        req_obj = user_info['req']
    except (KeyError, TypeError):
        req_obj = None

    ## Strip whitespace from the names of the source and creation templates:
    source_tpl = parameters['dummyrec_source_tpl'].replace(" ","")
    create_tpl = parameters['dummyrec_create_tpl'].replace(" ","")

    ## Call bibconvert to create the MARC XML record:
    cmd_bibconvert_call = "%s/bibconvert -l1 -d'%s' -Cs'%s/%s' -Ct'%s/%s' " \
                          "> %s/%s 2>/dev/null" \
                          % (CFG_BINDIR, \
                             curdir, \
                             CFG_WEBSUBMIT_BIBCONVERTCONFIGDIR, \
                             source_tpl, \
                             CFG_WEBSUBMIT_BIBCONVERTCONFIGDIR, \
                             create_tpl, \
                             curdir, \
                             CFG_WEBSUBMIT_DUMMY_XML_NAME)
    errcode_bibconvert = os.system(cmd_bibconvert_call)
    if errcode_bibconvert:
        ## There was a problem creating the dummy MARC XML record. Fail.
        err_msg = "Error: Unable to create dummy MARC XML record [%s/%s]. " \
                  "Bibconvert failed with error code [%s]." \
                  % (curdir, \
                     CFG_WEBSUBMIT_DUMMY_XML_NAME, \
                     errcode_bibconvert)
        raise InvenioWebSubmitFunctionError(err_msg)

    ## Bibconvert doesn't escape stuff for XML. Read the dummy record into
    ## memory, replace any "&" or "<" with "&amp;" and "&lt;", then re-write
    ## the dummy MARC XML record to the current dir:
    try:
        fp_dummyrec = open("%s/%s" % (curdir, \
                                      CFG_WEBSUBMIT_DUMMY_XML_NAME), "r")
        record_text = fp_dummyrec.read()
        fp_dummyrec.close()
    except IOError:
        ## Couldn't read the contents of dummy_marcxml_rec.
        err_msg = "Error: Unable to create dummy MARC XML record [%s/%s]. " \
                  "Bibconvert reported no error, but the record was " \
                  "unreadable later." % (curdir, CFG_WEBSUBMIT_DUMMY_XML_NAME)
        register_exception(req=req_obj, prefix=err_msg)
        raise InvenioWebSubmitFunctionError(err_msg)

    # Escape XML-reserved chars and clean the unsupported ones (mainly
    # control characters)
    record_text = wash_for_xml(record_text)
    ## Replace the "&":
    record_text = record_text.replace("&amp;","&")
    record_text = record_text.replace("&","&amp;")
    ## Now replace the "<":
    record_text = record_text.replace("<","&lt;")
    ## Having replaced "<" everywhere in the record, put it back in known
    ## MARC XML tags:
    record_text = record_text.replace("&lt;record","<record")
    record_text = record_text.replace("&lt;/record","</record")
    record_text = record_text.replace("&lt;datafield","<datafield")
    record_text = record_text.replace("&lt;/datafield","</datafield")
    record_text = record_text.replace("&lt;controlfield","<controlfield")
    record_text = record_text.replace("&lt;/controlfield","</controlfield")
    record_text = record_text.replace("&lt;subfield","<subfield")
    record_text = record_text.replace("&lt;/subfield","</subfield")

    ## Finally, re-write the dummy MARC XML record to the current submission's
    ## working directory:
    try:
        fp_dummyrec = open("%s/%s" % (curdir, \
                                      CFG_WEBSUBMIT_DUMMY_XML_NAME), "w")
        fp_dummyrec.write(record_text)
        fp_dummyrec.flush()
        fp_dummyrec.close()
    except IOError, err:
        ## Unable to write the dummy MARC XML record to curdir.
        err_msg = "Error: Unable to create dummy MARC XML record [%s/%s]. " \
                  "After having escaped its data contents for XML, it could " \
                  "not be written back to the submission's working directory." \
                  % (curdir, CFG_WEBSUBMIT_DUMMY_XML_NAME)
        register_exception(req=req_obj, prefix=err_msg)
        raise InvenioWebSubmitFunctionError(err_msg)
 def test_latin_characters_washing_1_1(self):
     """textutils - washing latin characters for XML 1.1."""
     self.assertEqual(wash_for_xml("àèéìòùÀ", xml_version="1.1"), "àèéìòùÀ")
Пример #30
0
 def test_illegal_characters_washing_1_0(self):
     """textutils - washing illegal characters for XML 1.0."""
     self.assertEqual(wash_for_xml(chr(8) + chr(9) + 'some chars'),
                      '\tsome chars')
     self.assertEqual(wash_for_xml('$b\bar{b}$'), '$bar{b}$')
 def test_illegal_characters_washing_1_1(self):
     """textutils - washing illegal characters for XML 1.1."""
     self.assertEqual(wash_for_xml(chr(8) + chr(9) + "some chars", xml_version="1.1"), "\x08\tsome chars")
     self.assertEqual(wash_for_xml("$b\bar{b}$", xml_version="1.1"), "$b\x08ar{b}$")
 def test_illegal_characters_washing_1_1(self):
     """textutils - washing illegal characters for XML 1.1."""
     self.assertEqual(wash_for_xml(chr(8) + chr(9) + 'some chars',
                                   xml_version='1.1'), '\x08\tsome chars')
     self.assertEqual(wash_for_xml('$b\bar{b}$', xml_version='1.1'), '$b\x08ar{b}$')
Пример #33
0
 def test_latin_characters_washing_1_1(self):
     """textutils - washing latin characters for XML 1.1."""
     self.assertEqual(wash_for_xml('àèéìòùÀ', xml_version='1.1'), 'àèéìòùÀ')
Пример #34
0
def save_xml_record(recid,
                    uid,
                    xml_record='',
                    to_upload=True,
                    to_merge=False,
                    task_name="bibedit",
                    sequence_id=None):
    """Write XML record to file. Default behaviour is to read the record from
    a BibEdit cache file, filter out the unchanged volatile subfields,
    write it back to an XML file and then pass this file to BibUpload.

    @param xml_record: give XML as string in stead of reading cache file
    @param to_upload: pass the XML file to BibUpload
    @param to_merge: prepare an XML file for BibMerge to use

    """
    if not xml_record:
        # Read record from cache file.
        cache = get_cache_contents(recid, uid)
        if cache:
            record = cache[2]
            used_changes = cache[4]
            xml_record = record_xml_output(record)
            delete_cache(recid, uid)
            delete_disabled_changes(used_changes)
    else:
        record = create_record(xml_record)[0]

    # clean the record from unfilled volatile fields
    record_strip_empty_volatile_subfields(record)
    record_strip_empty_fields(record)

    # order subfields alphabetically before saving the record
    record_order_subfields(record)

    xml_to_write = wash_for_xml(record_xml_output(record))

    # Write XML file.
    if not to_merge:
        fd, file_path = tempfile.mkstemp(dir=CFG_BIBEDIT_CACHEDIR,
                                         prefix="%s_" % CFG_BIBEDIT_FILENAME,
                                         suffix="_%s_%s.xml" % (recid, uid))
        f = os.fdopen(fd, 'w')
        f.write(xml_to_write)
        f.close()
    else:
        file_path = '%s_%s.xml' % (_get_file_path(
            recid, uid), CFG_BIBEDIT_TO_MERGE_SUFFIX)
        xml_file = open(file_path, 'w')
        xml_file.write(xml_to_write)
        xml_file.close()

    user_name = get_user_info(uid)[1]
    if to_upload:
        args = [
            'bibupload', user_name, '-P', '5', '-r', file_path, '-u', user_name
        ]
        if task_name == "bibedit":
            args.extend(['--name', 'bibedit'])
        if sequence_id:
            args.extend(["-I", sequence_id])
        args.extend(['--email-logs-on-error'])
        task_low_level_submission(*args)
    return True