def filterwordswithpunctuation(str1): """Goes through a list of known words that have punctuation and removes the punctuation from them. """ if u"'" not in str1: return str1 occurrences = [] for word, replacement in six.iteritems(wordswithpunctuation): occurrences.extend([(pos, word, replacement) for pos in quote.find_all(str1, word)]) for match in word_with_apos_re.finditer(str1): word = match.group() replacement = ''.join(filter(six.text_type.isalnum, word)) occurrences.append((match.start(), word, replacement)) occurrences.sort() if occurrences: lastpos = 0 newstr1 = "" for pos, word, replacement in occurrences: newstr1 += str1[lastpos:pos] newstr1 += replacement lastpos = pos + len(word) newstr1 += str1[lastpos:] return newstr1 else: return str1
def unescape(line): """Unescape the given line. Quotes on either side should already have been removed. """ escape_places = quote.find_all(line, u"\\") if not escape_places: return line # filter escaped escapes true_escape = False true_escape_places = [] for escape_pos in escape_places: if escape_pos - 1 in escape_places: true_escape = not true_escape else: true_escape = True if true_escape: true_escape_places.append(escape_pos) extracted = [] lastpos = 0 for pos in true_escape_places: # everything leading up to the escape extracted.append(line[lastpos:pos]) # the escaped sequence (consuming 2 characters) extracted.append(unescapehandler(line[pos:pos + 2])) lastpos = pos + 2 extracted.append(line[lastpos:]) return u"".join(extracted)
def unescape(line): """Unescape the given line. Quotes on either side should already have been removed. """ escape_places = quote.find_all(line, u"\\") if not escape_places: return line # filter escaped escapes true_escape = False true_escape_places = [] for escape_pos in escape_places: if escape_pos - 1 in escape_places: true_escape = not true_escape else: true_escape = True if true_escape: true_escape_places.append(escape_pos) extracted = u"" lastpos = 0 for pos in true_escape_places: # everything leading up to the escape extracted += line[lastpos:pos] # the escaped sequence (consuming 2 characters) extracted += unescapehandler(line[pos:pos+2]) lastpos = pos+2 extracted += line[lastpos:] return extracted
def test_find_all(): """tests the find_all function""" assert quote.find_all("", "a") == [] assert quote.find_all("a", "b") == [] assert quote.find_all("a", "a") == [0] assert quote.find_all("aa", "a") == [0, 1] assert quote.find_all("abba", "ba") == [2] # check we skip the whole instance assert quote.find_all("banana", "ana") == [1]
def escapeforpo(line): """Escapes a line for po format. assumes no \n occurs in the line. :param line: unescaped text """ special_locations = [] for special_key in po_escape_map: special_locations.extend(quote.find_all(line, special_key)) special_locations = sorted(dict.fromkeys(special_locations).keys()) escaped_line = [] last_location = 0 for location in special_locations: escaped_line.append(line[last_location:location]) escaped_line.append(po_escape_map[line[location:location + 1]]) last_location = location + 1 escaped_line.append(line[last_location:]) return "".join(escaped_line)
def escapeforpo(line): """Escapes a line for po format. assumes no \n occurs in the line. :param line: unescaped text """ special_locations = [] for special_key in po_escape_map: special_locations.extend(quote.find_all(line, special_key)) special_locations = sorted(dict.fromkeys(special_locations).keys()) escaped_line = "" last_location = 0 for location in special_locations: escaped_line += line[last_location:location] escaped_line += po_escape_map[line[location:location+1]] last_location = location + 1 escaped_line += line[last_location:] return escaped_line
def escapeforpo(line): """Escapes a line for po format. assumes no \n occurs in the line. @param line: unescaped text """ special_locations = [] for special_key in po_escape_map: special_locations.extend(quote.find_all(line, special_key)) special_locations = dict.fromkeys(special_locations).keys() special_locations.sort() escaped_line = "" last_location = 0 for location in special_locations: escaped_line += line[last_location:location] escaped_line += po_escape_map[line[location:location+1]] last_location = location + 1 escaped_line += line[last_location:] return escaped_line
def filterwordswithpunctuation(str1): """goes through a list of known words that have punctuation and removes the punctuation from them""" assert isinstance(str1, unicode) occurrences = [] for word, replacement in wordswithpunctuation.iteritems(): occurrences.extend([(pos, word, replacement) for pos in quote.find_all(str1, word)]) for match in re.finditer("(?u)\w+'\w+", str1): word = match.group() replacement = filter(unicode.isalnum, word) occurrences.append((match.start(), word, replacement)) occurrences.sort() if occurrences: lastpos = 0 newstr1 = "" for pos, word, replacement in occurrences: newstr1 += str1[lastpos:pos] newstr1 += replacement lastpos = pos + len(word) newstr1 += str1[lastpos:] return newstr1 else: return str1