Exemplos de remove_excessive_whitespace em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: typotools

Método / Função: remove_excessive_whitespace

Exemplos em hotexamples.com: 2

remove_excessive_whitespace em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de typotools.remove_excessive_whitespace em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Relacionados

check_num_digits_moved

PyGFPEncoder

attach_volume

Provider

checkAndRenewVomsProxy

isblogger

get_default_role

SqoopImport

lock

TriplestoreRequest

Related in langs

CommentPost (PHP)

UPnP (PHP)

ClientTestClusterBase (C#)

WFEM_NegPara (C#)

ip6_route (C++)

getElapsedTime (C++)

AddFilter (Go)

SetExtension (Go)

AccessControl (Java)

Class (Java)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: chgk_composer.py Projeto: peczony/chgksuite

def parse_4s_elem(s): def find_next_unescaped(ss, index): j = index + 1 while j < len(ss): if ss[j] == '\\' and j+2 < len(ss): j += 2 if ss[j] == ss[index]: return j j += 1 return -1 for gr in re_url.finditer(s): gr0 = gr.group(0) s = s.replace(gr0, gr0.replace('_', '\\_')) # for gr in re_scaps.finditer(s): # gr0 = gr.group(0) # s = s.replace(gr0, '(sc '+gr0.lower()+')') grs = sorted([match.group(0) for match in re_perc.finditer(s)], key=len, reverse=True) for gr in grs: try: s = s.replace(gr,urllib.unquote(gr.encode('utf8')).decode('utf8')) except: debug_print('error decoding on line {}: {}\n' .format(gr, traceback.format_exc())) s = list(s) i = 0 topart = [] while i < len(s): if s[i] == '_' and (i == 0 or s[i-1] != '\\'): debug_print('found _ at {} of line {}' .format(i, s)) topart.append(i) if find_next_unescaped(s, i) != -1: topart.append(find_next_unescaped(s, i)+1) i = find_next_unescaped(s, i) + 2 continue if (s[i] == '(' and i + len('(img') < len(s) and ''.join(s[i: i+len('(img')])=='(img'): debug_print('img candidate') topart.append(i) if not typotools.find_matching_closing_bracket(s, i) is None: topart.append( typotools.find_matching_closing_bracket(s, i)+1) i = typotools.find_matching_closing_bracket(s, i)+2 # if (s[i] == '(' and i + len('(sc') < len(s) and ''.join(s[i: # i+len('(sc')])=='(sc'): # debug_print('sc candidate') # topart.append(i) # if not typotools.find_matching_closing_bracket(s, i) is None: # topart.append( # typotools.find_matching_closing_bracket(s, i)+1) # i = typotools.find_matching_closing_bracket(s, i)+2 i += 1 topart = sorted(topart) parts = [['', ''.join(x)] for x in partition(s, topart)] debug_print(pprint.pformat(parts).decode('unicode_escape')) for part in parts: if part == ['', '']: continue try: if part[1][-1] == '_': part[1] = part[1][1:] part[0] = 'em' if part[1][-1] == '_': part[1] = part[1][:-1] part[0] = 'em' if len(part[1]) > 4 and part[1][:4] == '(img': if part[1][-1] != ')': part[1] = part[1] + ')' part[1] = typotools.remove_excessive_whitespace( part[1][4:-1]) part[0] = 'img' debug_print('found img at {}' .format(pprint.pformat(part[1]))) if len(part[1]) > 3 and part[1][:4] == '(sc': if part[1][-1] != ')': part[1] = part[1] + ')' part[1] = typotools.remove_excessive_whitespace( part[1][3:-1]) part[0] = 'sc' debug_print('found img at {}' .format(pprint.pformat(part[1]))) part[1] = part[1].replace('\\_', '_') except: sys.stderr.write('Error on part {}: {}' .format(pprint.pformat(part).decode('unicode_escape'), traceback.format_exc() )) return parts

Exemplo n.º 2

0

Exibir arquivo

Arquivo: chgksuitelib.py Projeto: peczony/chgksuite

def __init__(self, text): """ Parsing rationale: every Question has two required fields: 'question' and the immediately following 'answer'. All the rest are optional, as is the order of these fields. On the other hand, everything except the 'question' is obligatorily marked, while the 'question' is optionally marked. But IF the question is not marked, 'meta' comments between Questions will not be parsed as 'meta' but will be merged to 'question's. Parsing is done by regexes in the following steps: 1. Identify all the fields you can, mark them with their respective labels, mark all the others with '' 2. Merge fields inside Question with '' lines between them 3. Ensure every 'answer' has a 'question' 4. Mark all remaining '' fields as 'meta' 5. Prettify input 6. Pack Questions into dicts 7. Store the resulting structure as self.structure """ self.structure = [] # 1. for x in re.split(r'\r?\n',text): if x != '': self.structure.append(['',remove_excessive_whitespace(x)]) i = 0 st = self.structure while i < len(st): matching_regexes = { (regex, self.regexes[regex].search(st[i][1]).start(0)) for regex in self.regexes if self.regexes[regex].search(st[i][1])} # If more than one regex matches string, split it and # insert into structure separately. if len(matching_regexes) == 1: st[i][0] = matching_regexes.pop()[0] elif len(matching_regexes) > 1: sorted_r = sorted(matching_regexes, key=lambda x: x[1]) slices = [] for j in range(1, len(sorted_r)): slices.append( [sorted_r[j][0], st[i][1][ sorted_r[j][1] : sorted_r[j+1][1] if j+1 < len(sorted_r) else len(st[i][1])]]) for slice_ in slices: self.structure.insert( i+1, slice_) st[i][0] = sorted_r[0][0] st[i][1] = st[i][1][:sorted_r[1][1]] i += 1 self.structure = st i = 0 # 2. self.merge_y_to_x('question','answer') self.merge_to_x_until_nextfield('answer') self.merge_to_x_until_nextfield('comment') # 3. i = 0 while i < len(self.structure): if (self.structure[i][0] == 'answer' and self.structure[i-1][0] not in ('question', 'newquestion')): self.structure.insert(i,['newquestion','']) i = 0 i += 1 i = 0 while i < len(self.structure) - 1: if (self.structure[i][0] == '' and self.structure[i+1][0] == 'newquestion'): self.merge_to_next(i) if (re_number.search( remove_excessive_whitespace(self.structure[i][1])) and not re_number.search( remove_excessive_whitespace(self.structure[i-1][1]))): self.structure[i][0] = 'question' self.structure[i][1] = re_number.sub( '',remove_excessive_whitespace(self.structure[i][1])) try: self.structure.insert(i, ['number', int(re_number.search( remove_excessive_whitespace( self.structure[i][1]) ).group(0))]) except: pass # TODO: figure out what this means i = 0 i += 1 for element in self.structure: if element[0] == 'newquestion': element[0] = 'question' self.dirty_merge_to_x_until_nextfield('source') for id, element in enumerate(self.structure): if (element[0] == 'author' and re.search(r'^{}$'.format(re_author. pattern), remove_excessive_whitespace(element[1])) and id + 1 < len(self.structure)): merge_to_previous(id+1) self.merge_to_x_until_nextfield('accept') self.merge_to_x_until_nextfield('reject') # 4. self.structure = [x for x in self.structure if [x[0], remove_excessive_whitespace(x[1])] != ['', '']] if self.structure[0][0] == '' and re_number.search( remove_excessive_whitespace(self.structure[0][1])): self.merge_to_next(0) for id, element in enumerate(self.structure): if element[0] == '': element[0] = 'meta' if (element[0] in self.regexes and element[0] not in ['tour', 'tourrev']): if element[0] == 'question': try: num = re_question.search(element[1]).group(1) self.structure.insert(id, ['number', num]) except: pass element[1] = self.regexes[element[0]].sub('', element[1]) # 5. for id, element in enumerate(self.structure): # typogrify if element[0] != 'date': element[1] = recursive_typography(element[1]) # remove question numbers if element[0] == 'question': try: num = re_question.search(element[1]).group(1) self.structure.insert(id, ['number', num]) except: pass element[1] = re_number.sub('', element[1]) # detect inner lists mo = {m for m in re.finditer(r'(\s+|^)(\d+)[\.\)]\s*(?!\d)', element[1], re.U)} if len(mo) > 1: sorted_up = sorted(mo, key=lambda m: int(m.group(2))) j = 0 list_candidate = [] while j == int(sorted_up[j].group(2)) - 1: list_candidate.append((j+1, sorted_up[j].group(0), sorted_up[j].start())) if j+1 < len(sorted_up): j += 1 else: break if len(list_candidate) > 1: if (element[0] != 'question' or (element[0] == 'question' and 'дуплет' in element[1].lower() or 'блиц' in element[1].lower())): part = partition(element[1], [x[2] for x in list_candidate]) lc = 0 while lc < len(list_candidate): part[lc+1] = part[lc+1].replace( list_candidate[lc][1], '') lc += 1 element[1] = ([part[0], part[1:]] if part[0] != '' else part[1:]) # turn source into list if necessary if (element[0] == 'source' and isinstance(element[1], basestring) and len(re.split(r'\r?\n', element[1])) > 1): element[1] = [re_number.sub('', remove_excessive_whitespace(x)) for x in re.split(r'\r?\n', element[1])] # 6. final_structure = [] current_question = {} for element in self.structure: if element[0] in set(['tour', 'question', 'meta']): if current_question != {}: check_question(current_question) final_structure.append(Question(**current_question)) current_question = {} if element[0] in QUESTION_LABELS: if element[0] in current_question: try: current_question[element[0]] += SEP + element[1] except: print('{}'.format( current_question).decode('unicode_escape')) pdb.set_trace() else: current_question[element[0]] = element[1] else: final_structure.append([element[0], element[1]]) if current_question != {}: check_question(current_question) final_structure.append(Question(**current_question)) # 7. self.structure = final_structure