Пример #1
0
def filterwordswithpunctuation(str1):
    """Goes through a list of known words that have punctuation and removes the
    punctuation from them.
    """
    if u"'" not in str1:
        return str1
    occurrences = []
    for word, replacement in six.iteritems(wordswithpunctuation):
        occurrences.extend([(pos, word, replacement)
                            for pos in quote.find_all(str1, word)])
    for match in word_with_apos_re.finditer(str1):
        word = match.group()
        replacement = ''.join(filter(six.text_type.isalnum, word))
        occurrences.append((match.start(), word, replacement))
    occurrences.sort()
    if occurrences:
        lastpos = 0
        newstr1 = ""
        for pos, word, replacement in occurrences:
            newstr1 += str1[lastpos:pos]
            newstr1 += replacement
            lastpos = pos + len(word)
        newstr1 += str1[lastpos:]
        return newstr1
    else:
        return str1
Пример #2
0
def unescape(line):
    """Unescape the given line.

    Quotes on either side should already have been removed.
    """
    escape_places = quote.find_all(line, u"\\")
    if not escape_places:
        return line

    # filter escaped escapes
    true_escape = False
    true_escape_places = []
    for escape_pos in escape_places:
        if escape_pos - 1 in escape_places:
            true_escape = not true_escape
        else:
            true_escape = True
        if true_escape:
            true_escape_places.append(escape_pos)

    extracted = []
    lastpos = 0
    for pos in true_escape_places:
        # everything leading up to the escape
        extracted.append(line[lastpos:pos])
        # the escaped sequence (consuming 2 characters)
        extracted.append(unescapehandler(line[pos:pos + 2]))
        lastpos = pos + 2

    extracted.append(line[lastpos:])
    return u"".join(extracted)
Пример #3
0
def filterwordswithpunctuation(str1):
    """Goes through a list of known words that have punctuation and removes the
    punctuation from them.
    """
    if u"'" not in str1:
        return str1
    occurrences = []
    for word, replacement in six.iteritems(wordswithpunctuation):
        occurrences.extend([(pos, word, replacement) for pos in quote.find_all(str1, word)])
    for match in word_with_apos_re.finditer(str1):
        word = match.group()
        replacement = ''.join(filter(six.text_type.isalnum, word))
        occurrences.append((match.start(), word, replacement))
    occurrences.sort()
    if occurrences:
        lastpos = 0
        newstr1 = ""
        for pos, word, replacement in occurrences:
            newstr1 += str1[lastpos:pos]
            newstr1 += replacement
            lastpos = pos + len(word)
        newstr1 += str1[lastpos:]
        return newstr1
    else:
        return str1
Пример #4
0
def unescape(line):
    """Unescape the given line.

    Quotes on either side should already have been removed.
    """
    escape_places = quote.find_all(line, u"\\")
    if not escape_places:
        return line

    # filter escaped escapes
    true_escape = False
    true_escape_places = []
    for escape_pos in escape_places:
        if escape_pos - 1 in escape_places:
            true_escape = not true_escape
        else:
            true_escape = True
        if true_escape:
            true_escape_places.append(escape_pos)

    extracted = u""
    lastpos = 0
    for pos in true_escape_places:
        # everything leading up to the escape
        extracted += line[lastpos:pos]
        # the escaped sequence (consuming 2 characters)
        extracted += unescapehandler(line[pos:pos+2])
        lastpos = pos+2

    extracted += line[lastpos:]
    return extracted
Пример #5
0
def test_find_all():
    """tests the find_all function"""
    assert quote.find_all("", "a") == []
    assert quote.find_all("a", "b") == []
    assert quote.find_all("a", "a") == [0]
    assert quote.find_all("aa", "a") == [0, 1]
    assert quote.find_all("abba", "ba") == [2]
    # check we skip the whole instance
    assert quote.find_all("banana", "ana") == [1]
Пример #6
0
def test_find_all():
    """tests the find_all function"""
    assert quote.find_all("", "a") == []
    assert quote.find_all("a", "b") == []
    assert quote.find_all("a", "a") == [0]
    assert quote.find_all("aa", "a") == [0, 1]
    assert quote.find_all("abba", "ba") == [2]
    # check we skip the whole instance
    assert quote.find_all("banana", "ana") == [1]
Пример #7
0
def escapeforpo(line):
    """Escapes a line for po format. assumes no \n occurs in the line.

    :param line: unescaped text
    """
    special_locations = []
    for special_key in po_escape_map:
        special_locations.extend(quote.find_all(line, special_key))
    special_locations = sorted(dict.fromkeys(special_locations).keys())
    escaped_line = []
    last_location = 0
    for location in special_locations:
        escaped_line.append(line[last_location:location])
        escaped_line.append(po_escape_map[line[location:location + 1]])
        last_location = location + 1
    escaped_line.append(line[last_location:])
    return "".join(escaped_line)
Пример #8
0
def escapeforpo(line):
    """Escapes a line for po format. assumes no \n occurs in the line.

    :param line: unescaped text
    """
    special_locations = []
    for special_key in po_escape_map:
        special_locations.extend(quote.find_all(line, special_key))
    special_locations = sorted(dict.fromkeys(special_locations).keys())
    escaped_line = ""
    last_location = 0
    for location in special_locations:
        escaped_line += line[last_location:location]
        escaped_line += po_escape_map[line[location:location+1]]
        last_location = location + 1
    escaped_line += line[last_location:]
    return escaped_line
Пример #9
0
def escapeforpo(line):
    """Escapes a line for po format. assumes no \n occurs in the line.

    @param line: unescaped text
    """
    special_locations = []
    for special_key in po_escape_map:
        special_locations.extend(quote.find_all(line, special_key))
    special_locations = dict.fromkeys(special_locations).keys()
    special_locations.sort()
    escaped_line = ""
    last_location = 0
    for location in special_locations:
        escaped_line += line[last_location:location]
        escaped_line += po_escape_map[line[location:location+1]]
        last_location = location + 1
    escaped_line += line[last_location:]
    return escaped_line
Пример #10
0
def filterwordswithpunctuation(str1):
    """goes through a list of known words that have punctuation and removes the 
    punctuation from them"""
    assert isinstance(str1, unicode)
    occurrences = []
    for word, replacement in wordswithpunctuation.iteritems():
        occurrences.extend([(pos, word, replacement) for pos in quote.find_all(str1, word)])
    for match in re.finditer("(?u)\w+'\w+", str1):
        word = match.group()
        replacement = filter(unicode.isalnum, word)
        occurrences.append((match.start(), word, replacement))
    occurrences.sort()
    if occurrences:
        lastpos = 0
        newstr1 = ""
        for pos, word, replacement in occurrences:
            newstr1 += str1[lastpos:pos]
            newstr1 += replacement
            lastpos = pos + len(word)
        newstr1 += str1[lastpos:]
        return newstr1
    else:
        return str1