Пример #1
0
def getwords(s):
    # We decompose the string so that ascii letters with accents can be part of the word.
    s = normalize('NFD', s)
    s = multi_replace(s, "-_&+():;\\[]{}.,<>/?~!@#$*", ' ').lower()
    s = ''.join(c for c in s if c in string.ascii_letters + string.digits +
                string.whitespace)
    return [_f for _f in s.split(' ') if _f]  # remove empty elements
Пример #2
0
 def do_process(s, clean=True):
     if clean:
         s = multi_replace(s, FS_FORBIDDEN, ' ')
     if whitespaces == WS_SPACES_TO_UNDERSCORES:
         return s.replace(' ', '_').replace('\t', '_')
     elif whitespaces == WS_UNDERSCORES_TO_SPACES:
         return s.replace('_', ' ')
     else:
         return s
Пример #3
0
 def handle_item(chunk, which, letter=0):
     if not len(chunk):
         return
     if which == 'first':
         result = chunk[0].name
     elif which == 'last':
         result = chunk[-1].name
     if letter:
         result = result[:tryint(letter, None)]
     result = multi_replace(result, FS_FORBIDDEN, ' ')
     return result
Пример #4
0
def value_to_db(value):
    value_type = get_value_type(value)
    if value_type == ATTR_TYPE_INT:
        value = str(value)
    elif value_type == ATTR_TYPE_BINARY:
        value = ''.join(['%02x' % ord(char) for char in value])
    else:
        if isinstance(value,str):
            value = multi_replace(value,['\n','\r'],'')
        else:
            value = str(value)
    return value
Пример #5
0
def value_to_db(value):
    value_type = get_value_type(value)
    if value_type == ATTR_TYPE_INT:
        value = str(value)
    elif value_type == ATTR_TYPE_BINARY:
        value = ''.join(['%02x' % ord(char) for char in value])
    else:
        if isinstance(value, str):
            value = multi_replace(value, ['\n', '\r'], '')
        else:
            value = str(value)
    return value
Пример #6
0
def getwords(s):
    # We decompose the string so that ascii letters with accents can be part of the word.
    s = normalize("NFD", s)
    s = multi_replace(s, "-_&+():;\\[]{}.,<>/?~!@#$*", " ").lower()
    # logging.debug(f"DEBUG chars for: {s}\n"
    #               f"{[c for c in s if ord(c) != 32]}\n"
    #               f"{[ord(c) for c in s if ord(c) != 32]}")
    # HACK We shouldn't ignore non-ascii characters altogether. Any Unicode char
    # above common european characters that cannot be "sanitized" (ie. stripped
    # of their accents, etc.) are preserved as is. The arbitrary limit is
    # obtained from this one: ord("\u037e") GREEK QUESTION MARK
    s = "".join(c for c in s
                if (ord(c) <= 894 and c in string.ascii_letters +
                    string.digits + string.whitespace) or ord(c) > 894)
    return [_f for _f in s.split(" ") if _f]  # remove empty elements
Пример #7
0
 def RenameNode(self, node, new_name):
     #Returns what the node has actually been renamed to
     node.name = multi_replace(new_name, FS_FORBIDDEN)
     return node.name
Пример #8
0
def getwords(s):
    # We decompose the string so that ascii letters with accents can be part of the word.
    s = normalize("NFD", s)
    s = multi_replace(s, "-_&+():;\\[]{}.,<>/?~!@#$*", " ").lower()
    s = "".join(c for c in s if c in string.ascii_letters + string.digits + string.whitespace)
    return [_f for _f in s.split(" ") if _f]  # remove empty elements
Пример #9
0
 def RenameNode(self,node,new_name):
     #Returns what the node has actually been renamed to
     node.name = multi_replace(new_name,FS_FORBIDDEN)
     return node.name