Python normalize_str示例，openlp.core.common.normalize_str Python示例

示例#1

0

显示文件

文件： test_common.py 项目： ipic/projecao

 def test_normalize_str_replaces_crlf_with_lf(self):
     # GIVEN: a string containing crlf
     string = 'something\r\nelse'
     # WHEN: normalize is called
     normalized_string = normalize_str(string)
     # THEN: crlf is replaced with lf
     assert normalized_string == 'something\nelse'

示例#2

0

显示文件

文件： test_common.py 项目： ipic/projecao

 def test_normalize_str_removes_null_byte(self):
     # GIVEN: a string containing a null byte
     string = 'somet\x00hing'
     # WHEN: normalize is called
     normalized_string = normalize_str(string)
     # THEN: nullbyte is removed
     assert normalized_string == 'something'

示例#3

0

显示文件

文件： test_common.py 项目： ipic/projecao

 def test_normalize_str_leaves_newlines(self):
     # GIVEN: a string containing newlines
     string = 'something\nelse'
     # WHEN: normalize is called
     normalized_string = normalize_str(string)
     # THEN: string is unchanged
     assert normalized_string == string

示例#4

0

显示文件

文件： openoffice.py 项目： simhnna/openlp

    def process_songs_text(self, text):
        """
        Process the songs text

        :param text: The text.
        """
        song_texts = normalize_str(text).split('\f')
        self.set_defaults()
        for song_text in song_texts:
            if song_text.strip():
                self.process_song_text(song_text.strip())
                if self.check_complete():
                    self.finish()
                    self.set_defaults()
        if self.check_complete():
            self.finish()

示例#5

0

显示文件

 def do_import_file(self, file):
     """
     Process the OpenSong file - pass in a file-like object, not a file path.
     """
     self.set_defaults()
     try:
         tree = objectify.parse(file)
     except (Error, LxmlError):
         self.log_error(file.name, SongStrings.XMLSyntaxError)
         log.exception('Error parsing XML')
         return
     root = tree.getroot()
     if root.tag != 'song':
         self.log_error(file.name, str(
             translate('SongsPlugin.OpenSongImport', 'Invalid OpenSong song file. Missing song tag.')))
         return
     fields = dir(root)
     decode = {
         'copyright': self.add_copyright,
         'ccli': 'ccli_number',
         'author': self.parse_author,
         'title': 'title',
         'aka': 'alternate_title',
         'hymn_number': self.parse_song_book_name_and_number,
         'user1': self.add_comment,
         'user2': self.add_comment,
         'user3': self.add_comment
     }
     for attr, fn_or_string in list(decode.items()):
         if attr in fields:
             ustring = str(root.__getattr__(attr))
             if isinstance(fn_or_string, str):
                 if attr in ['ccli']:
                     ustring = ''.join(re.findall(r'\d+', ustring))
                     if ustring:
                         setattr(self, fn_or_string, int(ustring))
                     else:
                         setattr(self, fn_or_string, None)
                 else:
                     setattr(self, fn_or_string, ustring)
             else:
                 fn_or_string(ustring)
     # Themes look like "God: Awe/Wonder", but we just want
     # "Awe" and "Wonder".  We use a set to ensure each topic
     # is only added once, in case it is already there, which
     # is actually quite likely if the alttheme is set
     topics = set(self.topics)
     if 'theme' in fields:
         theme = str(root.theme)
         subthemes = theme[theme.find(':') + 1:].split('/')
         for topic in subthemes:
             topics.add(topic.strip())
     if 'alttheme' in fields:
         theme = str(root.alttheme)
         subthemes = theme[theme.find(':') + 1:].split('/')
         for topic in subthemes:
             topics.add(topic.strip())
     self.topics = list(topics)
     self.topics.sort()
     # data storage while importing
     verses = {}
     # keep track of verses appearance order
     our_verse_order = []
     # default verse
     verse_tag = VerseType.tags[VerseType.Verse]
     verse_num = '1'
     # for the case where song has several sections with same marker
     inst = 1
     if 'lyrics' in fields:
         lyrics = str(root.lyrics)
     else:
         lyrics = ''
     chords = []
     for this_line in lyrics.split('\n'):
         if not this_line.strip():
             continue
         # skip this line if it is a comment
         if this_line.startswith(';'):
             continue
         # skip page and column breaks
         if this_line.startswith('---') or this_line.startswith('-!!'):
             continue
         # guitar chords marker
         if this_line.startswith('.'):
             # Find the position of the chords so they can be inserted in the lyrics
             chords = []
             this_line = this_line[1:]
             chord = ''
             i = 0
             while i < len(this_line):
                 if this_line[i] != ' ':
                     chord_pos = i
                     chord += this_line[i]
                     i += 1
                     while i < len(this_line) and this_line[i] != ' ':
                         chord += this_line[i]
                         i += 1
                     chords.append((chord_pos, chord))
                     chord = ''
                 i += 1
             continue
         # verse/chorus/etc. marker
         if this_line.startswith('['):
             # drop the square brackets
             right_bracket = this_line.find(']')
             content = this_line[1:right_bracket].lower()
             # have we got any digits? If so, verse number is everything from the digits to the end (openlp does not
             # have concept of part verses, so just ignore any non integers on the end (including floats))
             match = re.match(r'(\D*)(\d+)', content)
             if match is not None:
                 verse_tag = match.group(1)
                 verse_num = match.group(2)
             else:
                 # otherwise we assume number 1 and take the whole prefix as the verse tag
                 verse_tag = content
                 verse_num = '1'
             verse_index = VerseType.from_loose_input(verse_tag) if verse_tag else 0
             verse_tag = VerseType.tags[verse_index]
             inst = 1
             if [verse_tag, verse_num, inst] in our_verse_order and verse_num in verses.get(verse_tag, {}):
                 inst = len(verses[verse_tag][verse_num]) + 1
             continue
         # number at start of line.. it's verse number
         if this_line[0].isdigit():
             verse_num = this_line[0]
             this_line = this_line[1:]
         verses.setdefault(verse_tag, {})
         verses[verse_tag].setdefault(verse_num, {})
         if inst not in verses[verse_tag][verse_num]:
             verses[verse_tag][verse_num][inst] = []
             our_verse_order.append([verse_tag, verse_num, inst])
         # If chords exists insert them
         if chords and Settings().value('songs/enable chords') and not Settings().value(
                 'songs/disable chords import'):
             offset = 0
             for (column, chord) in chords:
                 this_line = '{pre}[{chord}]{post}'.format(pre=this_line[:offset + column], chord=chord,
                                                           post=this_line[offset + column:])
                 offset += len(chord) + 2
         # Tidy text and remove the ____s from extended words
         this_line = normalize_str(this_line)
         this_line = this_line.replace('_', '')
         this_line = this_line.replace('||', '\n[---]\n')
         this_line = this_line.strip()
         # If the line consists solely of a '|', then just use the implicit newline
         # Otherwise, add a newline for each '|'
         if this_line == '|':
             this_line = ''
         else:
             this_line = this_line.replace('|', '\n')
         verses[verse_tag][verse_num][inst].append(this_line)
     # done parsing
     # add verses in original order
     verse_joints = {}
     for (verse_tag, verse_num, inst) in our_verse_order:
         lines = '\n'.join(verses[verse_tag][verse_num][inst])
         length = 0
         while length < len(verse_num) and verse_num[length].isnumeric():
             length += 1
         verse_def = '{tag}{number}'.format(tag=verse_tag, number=verse_num[:length])
         verse_joints[verse_def] = '{verse}\n[---]\n{lines}'.format(verse=verse_joints[verse_def], lines=lines) \
             if verse_def in verse_joints else lines
     # Parsing the dictionary produces the elements in a non-intuitive order.  While it "works", it's not a
     # natural layout should the user come back to edit the song.  Instead we sort by the verse type, so that we
     # get all the verses in order (v1, v2, ...), then the chorus(es), bridge(s), pre-chorus(es) etc.  We use a
     # tuple for the key, since tuples naturally sort in this manner.
     verse_defs = sorted(verse_joints.keys(),
                         key=lambda verse_def: (VerseType.from_tag(verse_def[0]), int(verse_def[1:])))
     for verse_def in verse_defs:
         lines = verse_joints[verse_def]
         self.add_verse(lines, verse_def)
     if not self.verses:
         self.add_verse('')
     # figure out the presentation order, if present
     if 'presentation' in fields and root.presentation:
         order = str(root.presentation)
         # We make all the tags in the lyrics lower case, so match that here and then split into a list on the
         # whitespace.
         order = order.lower().split()
         for verse_def in order:
             match = re.match(r'(\D*)(\d+.*)', verse_def)
             if match is not None:
                 verse_tag = match.group(1)
                 verse_num = match.group(2)
                 if not verse_tag:
                     verse_tag = VerseType.tags[VerseType.Verse]
             else:
                 # Assume it's no.1 if there are no digits
                 verse_tag = verse_def
                 verse_num = '1'
             verse_index = VerseType.from_loose_input(verse_tag)
             verse_tag = VerseType.tags[verse_index]
             verse_def = '{tag}{number}'.format(tag=verse_tag, number=verse_num)
             if verse_num in verses.get(verse_tag, {}):
                 self.verse_order_list.append(verse_def)
             else:
                 log.info('Got order {order} but not in verse tags, dropping this item from presentation '
                          'order'.format(order=verse_def))
     if not self.finish():
         self.log_error(file.name)

示例#6

0

显示文件

文件： easyslides.py 项目： ipic/projecao

    def _parse_and_add_lyrics(self, song):
        """
        Process the song lyrics

        :param song: The song details
        """
        try:
            lyrics = str(song.Contents).strip()
        except UnicodeDecodeError:
            log.exception('Unicode decode error while decoding Contents')
            self._success = False
            return
        except AttributeError:
            log.exception('no Contents')
            self._success = False
            return
        lines = lyrics.split('\n')
        # we go over all lines first, to determine information,
        # which tells us how to parse verses later
        region_lines = {}
        separator_lines = 0
        for line in lines:
            line = line.strip()
            if not line:
                continue
            elif line[1:7] == 'region':
                # this is region separator, probably [region 2]
                region = self._extract_region(line)
                region_lines[region] = 1 + region_lines.get(region, 0)
            elif line[0] == '[':
                separator_lines += 1
        # if the song has separators
        separators = (separator_lines > 0)
        # the number of different regions in song - 1
        if len(region_lines) > 1:
            log.info(
                'EasySlidesImport: the file contained a song named "{title}"'
                'with more than two regions, but only two regions are tested, '
                'encountered regions were: {keys}'.format(
                    title=self.title,
                    keys=','.join(list(region_lines.keys()))))
        # if the song has regions
        regions = (len(region_lines) > 0)
        # if the regions are inside verses
        regions_in_verses = (regions and
                             region_lines[list(region_lines.keys())[0]] > 1)
        MarkTypes = {
            'CHORUS': VerseType.tags[VerseType.Chorus],
            'VERSE': VerseType.tags[VerseType.Verse],
            'INTRO': VerseType.tags[VerseType.Intro],
            'ENDING': VerseType.tags[VerseType.Ending],
            'BRIDGE': VerseType.tags[VerseType.Bridge],
            'PRECHORUS': VerseType.tags[VerseType.PreChorus]
        }
        verses = {}
        # list as [region, versetype, versenum, instance]
        our_verse_order = []
        default_region = '1'
        reg = default_region
        verses[reg] = {}
        # instance differentiates occurrences of same verse tag
        vt = 'v'
        vn = '1'
        inst = 1
        for line in lines:
            line = line.strip()
            if not line:
                if separators:
                    # separators are used, so empty line means slide break
                    # inside verse
                    if self._list_has(verses, [reg, vt, vn, inst]):
                        inst += 1
                else:
                    # separators are not used, so empty line starts a new verse
                    vt = 'v'
                    vn = len(verses[reg].get(vt, {})) + 1
                    inst = 1
            elif line[0:7] == '[region':
                reg = self._extract_region(line)
                verses.setdefault(reg, {})
                if not regions_in_verses:
                    vt = 'v'
                    vn = '1'
                    inst = 1
            elif line[0] == '[':
                # this is a normal section marker
                marker = line[1:line.find(']')].upper()
                vn = '1'
                # have we got any digits?
                # If so, versenumber is everything from the digits to the end
                match = re.match(r'(.*)(\d+.*)', marker)
                if match:
                    marker = match.group(1).strip()
                    vn = match.group(2)
                vt = MarkTypes.get(marker, 'o') if marker else 'v'
                if regions_in_verses:
                    region = default_region
                inst = 1
                if self._list_has(verses, [reg, vt, vn, inst]):
                    inst = len(verses[reg][vt][vn]) + 1
            else:
                if not [reg, vt, vn, inst] in our_verse_order:
                    our_verse_order.append([reg, vt, vn, inst])
                verses[reg].setdefault(vt, {})
                verses[reg][vt].setdefault(vn, {})
                verses[reg][vt][vn].setdefault(inst, [])
                verses[reg][vt][vn][inst].append(normalize_str(line))
        # done parsing
        versetags = []
        # we use our_verse_order to ensure, we insert lyrics in the same order
        # as these appeared originally in the file
        for [reg, vt, vn, inst] in our_verse_order:
            if self._list_has(verses, [reg, vt, vn, inst]):
                # this is false, but needs user input
                versetag = '{tag}{number}'.format(tag=vt, number=vn)
                versetags.append(versetag)
                lines = '\n'.join(verses[reg][vt][vn][inst])
                self.add_verse(lines, versetag)
        SeqTypes = {
            'p': 'p1',
            'q': 'p2',
            'c': 'c1',
            't': 'c2',
            'b': 'b1',
            'w': 'b2',
            'e': 'e1'
        }
        # Make use of Sequence data, determining the order of verses
        try:
            order = str(song.Sequence).strip().split(',')
            for tag in order:
                if not tag:
                    continue
                elif tag[0].isdigit():
                    tag = 'v' + tag
                elif tag.lower() in SeqTypes:
                    tag = SeqTypes[tag.lower()]
                else:
                    continue
                if tag in versetags:
                    self.verse_order_list.append(tag)
                else:
                    log.info(
                        'Got order item {tag}, which is not in versetags, dropping item from presentation '
                        'order'.format(tag=tag))
        except UnicodeDecodeError:
            log.exception('Unicode decode error while decoding Sequence')
            self._success = False
        except AttributeError:
            pass

示例#7

0

显示文件

 def finish(self):
     """
     All fields have been set to this song. Write the song to disk.
     """
     if not self.check_complete():
         self.set_defaults()
         return False
     log.info(
         'committing song {title} to database'.format(title=self.title))
     song = Song()
     song.title = self.title
     if self.import_wizard is not None:
         self.import_wizard.increment_progress_bar(
             WizardStrings.ImportingType.format(source=song.title))
     song.alternate_title = self.alternate_title
     # Values will be set when cleaning the song.
     song.search_title = ''
     song.search_lyrics = ''
     song.verse_order = ''
     song.song_number = self.song_number
     verses_changed_to_other = {}
     sxml = SongXML()
     other_count = 1
     for (verse_def, verse_text, lang) in self.verses:
         if verse_def[0].lower() in VerseType.tags:
             verse_tag = verse_def[0].lower()
         else:
             new_verse_def = '{tag}{count:d}'.format(
                 tag=VerseType.tags[VerseType.Other], count=other_count)
             verses_changed_to_other[verse_def] = new_verse_def
             other_count += 1
             verse_tag = VerseType.tags[VerseType.Other]
             log.info('Versetype {old} changing to {new}'.format(
                 old=verse_def, new=new_verse_def))
             verse_def = new_verse_def
         sxml.add_verse_to_lyrics(verse_tag, verse_def[1:],
                                  normalize_str(verse_text), lang)
     song.lyrics = str(sxml.extract_xml(), 'utf-8')
     if not self.verse_order_list and self.verse_order_list_generated_useful:
         self.verse_order_list = self.verse_order_list_generated
     self.verse_order_list = [
         verses_changed_to_other.get(v, v) for v in self.verse_order_list
     ]
     song.verse_order = ' '.join(self.verse_order_list)
     song.copyright = self.copyright
     song.comments = self.comments
     song.theme_name = self.theme_name
     song.ccli_number = self.ccli_number
     for author_text, author_type in self.authors:
         author = self.manager.get_object_filtered(
             Author, Author.display_name == author_text)
         if not author:
             author = Author.populate(display_name=author_text,
                                      last_name=author_text.split(' ')[-1],
                                      first_name=' '.join(
                                          author_text.split(' ')[:-1]))
         song.add_author(author, author_type)
     if self.song_book_name:
         song_book = self.manager.get_object_filtered(
             Book, Book.name == self.song_book_name)
         if song_book is None:
             song_book = Book.populate(name=self.song_book_name,
                                       publisher=self.song_book_pub)
         song.add_songbook_entry(song_book, song.song_number)
     for topic_text in self.topics:
         if not topic_text:
             continue
         topic = self.manager.get_object_filtered(Topic,
                                                  Topic.name == topic_text)
         if topic is None:
             topic = Topic.populate(name=topic_text)
         song.topics.append(topic)
     # We need to save the song now, before adding the media files, so that
     # we know where to save the media files to.
     clean_song(self.manager, song)
     self.manager.save_object(song)
     # Now loop through the media files, copy them to the correct location,
     # and save the song again.
     for file_path, weight in self.media_files:
         media_file = self.manager.get_object_filtered(
             MediaFile, MediaFile.file_path == file_path)
         if not media_file:
             if file_path.parent:
                 file_path = self.copy_media_file(song.id, file_path)
             song.media_files.append(
                 MediaFile.populate(file_path=file_path, weight=weight))
     self.manager.save_object(song)
     self.set_defaults()
     return True