def test_non_ascii(): _test_non_ascii = u"insert into test (id, name) values (1, 'тест');" s = _test_non_ascii stmts = bsqlparse.parse(s) assert len(stmts) == 1 statement = stmts[0] assert text_type(statement) == s assert statement._pprint_tree() is None s = _test_non_ascii.encode('utf-8') stmts = bsqlparse.parse(s, 'utf-8') assert len(stmts) == 1 statement = stmts[0] assert text_type(statement) == _test_non_ascii assert statement._pprint_tree() is None
def group_tokens(self, grp_cls, start, end, include_end=True, extend=False): """Replace tokens by an instance of *grp_cls*.""" start_idx = start start = self.tokens[start_idx] end_idx = end + include_end # will be needed later for new group_clauses # while skip_ws and tokens and tokens[-1].is_whitespace: # tokens = tokens[:-1] if extend and isinstance(start, grp_cls): subtokens = self.tokens[start_idx + 1:end_idx] grp = start grp.tokens.extend(subtokens) del self.tokens[start_idx + 1:end_idx] grp.value = text_type(start) else: subtokens = self.tokens[start_idx:end_idx] grp = grp_cls(subtokens) self.tokens[start_idx:end_idx] = [grp] grp.parent = self for token in subtokens: token.parent = grp return grp
def _process_case(self, tlist): offset_ = len('case ') + len('when ') cases = tlist.get_cases(skip_ws=True) # align the end as well end_token = tlist.token_next_by(m=(T.Keyword, 'END'))[1] cases.append((None, [end_token])) condition_width = [ len(' '.join(map(text_type, cond))) if cond else 0 for cond, _ in cases ] max_cond_width = max(condition_width) for i, (cond, value) in enumerate(cases): # cond is None when 'else or end' stmt = cond[0] if cond else value[0] if i > 0: tlist.insert_before(stmt, self.nl(offset_ - len(text_type(stmt)))) if cond: ws = sql.Token( T.Whitespace, self.char * (max_cond_width - condition_width[i])) tlist.insert_after(cond[-1], ws)
def test_split_comment_with_umlaut(): sql = (u'select * from foo;\n' u'-- Testing an umlaut: ä\n' u'select * from bar;') stmts = bsqlparse.parse(sql) assert len(stmts) == 2 assert ''.join(text_type(q) for q in stmts) == sql
def __init__(self, ttype, value): value = text_type(value) self.value = value self.ttype = ttype self.parent = None self.is_group = False self.is_keyword = ttype in T.Keyword self.is_whitespace = self.ttype in T.Whitespace self.normalized = value.upper() if self.is_keyword else value
def split(sql, encoding=None): """Split *sql* into single statements. :param sql: A string containing one or more SQL statements. :param encoding: The encoding of the statement (optional). :returns: A list of strings. """ stack = engine.FilterStack() return [text_type(stmt).strip() for stmt in stack.run(sql, encoding)]
def process(self, stmt): self.count += 1 if self.count > 1: varname = u'{f.varname}{f.count}'.format(f=self) else: varname = self.varname has_nl = len(text_type(stmt).strip().splitlines()) > 1 stmt.tokens = self._process(stmt.tokens, varname, has_nl) return stmt
def process(self, stmt): self._curr_stmt = stmt self._process(stmt) if self._last_stmt is not None: nl = '\n' if text_type(self._last_stmt).endswith('\n') else '\n\n' stmt.tokens.insert(0, sql.Token(T.Whitespace, nl)) self._last_stmt = stmt return stmt
def _split_kwds(self, tlist): tidx, token = self._next_token(tlist) while token: # joins are special case. only consider the first word as aligner if token.match(T.Keyword, self.join_words, regex=True): token_indent = token.value.split()[0] else: token_indent = text_type(token) tlist.insert_before(token, self.nl(token_indent)) tidx += 1 tidx, token = self._next_token(tlist, tidx)
def _split_kwds(self, tlist): tidx, token = self._next_token(tlist) while token: pidx, prev_ = tlist.token_prev(tidx, skip_ws=False) uprev = text_type(prev_) if prev_ and prev_.is_whitespace: del tlist.tokens[pidx] tidx -= 1 if not (uprev.endswith('\n') or uprev.endswith('\r')): tlist.insert_before(tidx, self.nl()) tidx += 1 tidx, token = self._next_token(tlist, tidx)
def split_unquoted_newlines(stmt): """Split a string on all unquoted newlines. Unlike str.splitlines(), this will ignore CR/LF/CR+LF if the requisite character is inside of a string.""" text = text_type(stmt) lines = SPLIT_REGEX.split(text) outputlines = [''] for line in lines: if not line: continue elif LINE_MATCH.match(line): outputlines.append('') else: outputlines[-1] += line return outputlines
def _process(self, group, stream): for token in stream: if token.is_whitespace and '\n' in token.value: if token.value.endswith('\n'): self.line = '' else: self.line = token.value.splitlines()[-1] elif token.is_group and type(token) not in self.keep_together: token.tokens = self._process(token, token.tokens) else: val = text_type(token) if len(self.line) + len(val) > self.width: match = re.search(r'^ +', self.line) if match is not None: indent = match.group() else: indent = '' yield sql.Token(T.Whitespace, '\n{0}'.format(indent)) self.line = indent self.line += val yield token
def _get_repr_value(self): raw = text_type(self) # if len(raw) > 100: # raw = raw[:99] + '...' return re.sub(r'\s+', ' ', raw)
def __init__(self, tokens=None): self.tokens = tokens or [] [setattr(token, 'parent', self) for token in tokens] super(TokenList, self).__init__(None, text_type(self)) self.is_group = True
def test_split_create_function(load_file, fn): sql = load_file(fn) stmts = bsqlparse.parse(sql) assert len(stmts) == 1 assert text_type(stmts[0]) == sql