示例#1
0
    def compute_vowel_representation(self):
        '''
        Compute a representation of the lyrics where only vowels are preserved.
        '''
        self.vow = [] # Lyrics with all but vowels removed
        self.vow_idxs = [] # Indices of the vowels in self.text list
        self.word_ends = [] # Indices of the last characters of each word
        self.words = [] # List of words in the lyrics
        self.line_idxs = []

        if len(self.language) >= 2 and self.language[:2] == 'en':
            self.text_orig = self.text
            self.text = ph.get_phonetic_transcription(self.text, output_fname=self.filename+'.ipa')
            self.word_ends_orig = []
            self.words_orig = []

        prev_space_idx = -1 # Index of the previous space char
        line_idx = 0 # Line index of the current character
        # Go through the lyrics char by char
        for i in range(len(self.text)):
            self.line_idxs.append(line_idx)
            c = self.text[i]
            c = ph.map_vow(c, self.language)
            if ph.is_vow(c, self.language):
                # Ignore double vowels
                # (in English this applies probably only to 'aa' as in 'bath'
                # which rhymes with 'trap' that has only 'a')
                if i > 0 and self.text[i-1] == c:
                    # Index of a double vowel points to the latter occurrence
                    self.vow_idxs[-1] = i
                    continue
                # TODO Diftongs should not be split (i.e. "price" should
                # not rhyme with "trap kit"). This has been fixed in BattleBot
                self.vow.append(c)
                self.vow_idxs.append(i)
            elif ph.is_space(c):
                if c in '\n':
                    line_idx += 1
                elif c in '.!?' and i < len(self.text)-1 and self.text[i+1] != '\n':
                    line_idx += 1
                # If previous char was not a space, we've encountered word end
                if len(self.vow) > 0 and not ph.is_space(self.text[i-1]):
                    # Put together the new word. Potential consonants in the 
                    # end are ignored
                    new_word = self.text[prev_space_idx+1:self.vow_idxs[-1]+1]
                    # Check that the new word contains at least one vowel
                    no_vowels = True
                    for c2 in new_word:
                        if ph.is_vow(c2, self.language):
                            no_vowels = False
                            break
                    if no_vowels:
                        prev_space_idx = i
                        continue
                    self.word_ends.append(len(self.vow)-1)
                    self.words.append(new_word)
                prev_space_idx = i

        if len(self.language) >= 2 and self.language[:2] == 'en':
            self.lines_orig = self.text_orig.split('\n')
示例#2
0
    def compute_vowel_representation(self):
        '''
        Compute a representation of the lyrics where only vowels are preserved.
        '''
        self.vow = [] # Lyrics with all but vowels removed
        self.vow_idxs = [] # Indices of the vowels in self.text list
        self.word_ends = [] # Indices of the last characters of each word
        self.words = [] # List of words in the lyrics
        self.line_idxs = []

        if len(self.language) >= 2 and self.language[:2] == 'en':
            self.text_orig = self.text
            self.text = ph.get_phonetic_transcription(self.text, output_fname=self.filename+'.ipa')
            self.word_ends_orig = []
            self.words_orig = []

        prev_space_idx = -1 # Index of the previous space char
        line_idx = 0 # Line index of the current character
        # Go through the lyrics char by char
        for i in range(len(self.text)):
            self.line_idxs.append(line_idx)
            c = self.text[i]
            c = ph.map_vow(c, self.language)
            if ph.is_vow(c, self.language):
                # Ignore double vowels
                # (in English this applies probably only to 'aa' as in 'bath'
                # which rhymes with 'trap' that has only 'a')
                if i > 0 and self.text[i-1] == c:
                    # Index of a double vowel points to the latter occurrence
                    self.vow_idxs[-1] = i
                    continue
                # TODO Diftongs should not be split (i.e. "price" should
                # not rhyme with "trap kit"). This has been fixed in BattleBot
                self.vow.append(c)
                self.vow_idxs.append(i)
            elif ph.is_space(c):
                if c in '\n':
                    line_idx += 1
                elif c in '.!?' and i < len(self.text)-1 and self.text[i+1] != '\n':
                    line_idx += 1
                # If previous char was not a space, we've encountered word end
                if len(self.vow) > 0 and not ph.is_space(self.text[i-1]):
                    # Put together the new word. Potential consonants in the 
                    # end are ignored
                    new_word = self.text[prev_space_idx+1:self.vow_idxs[-1]+1]
                    # Check that the new word contains at least one vowel
                    no_vowels = True
                    for c2 in new_word:
                        if ph.is_vow(c2, self.language):
                            no_vowels = False
                            break
                    if no_vowels:
                        prev_space_idx = i
                        continue
                    self.word_ends.append(len(self.vow)-1)
                    self.words.append(new_word)
                prev_space_idx = i

        if len(self.language) >= 2 and self.language[:2] == 'en':
            self.lines_orig = self.text_orig.split('\n')
示例#3
0
文件: feature.py 项目: q9s5c1/HAVAE
    def Get_vowel(self, rhyme_list):
        vowel_list = []
        vowel_last_index = []
        all_vowel = []
        for k in range(len(rhyme_list)):
            vowel = ''
            index = -1
            for j in range(len(rhyme_list[k])):

                c = ph.map_vow(rhyme_list[k][j], 'en-g')
                if ph.is_vow(c, 'en-g'):
                    vowel = vowel + c
                    index = j
                    all_vowel.append(c)
            vowel_list.append(vowel)
            vowel_last_index.append(index)
        return vowel_list, vowel_last_index, all_vowel