108 lines
3.7 KiB
Python
108 lines
3.7 KiB
Python
|
#
|
||
|
# Copyright (C) 2009-2020 the sqlparse authors and contributors
|
||
|
# <see AUTHORS file>
|
||
|
#
|
||
|
# This module is part of python-sqlparse and is released under
|
||
|
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
|
||
|
|
||
|
from sqlparse import sql, tokens as T
|
||
|
|
||
|
|
||
|
class StatementSplitter:
|
||
|
"""Filter that split stream at individual statements"""
|
||
|
|
||
|
def __init__(self):
|
||
|
self._reset()
|
||
|
|
||
|
def _reset(self):
|
||
|
"""Set the filter attributes to its default values"""
|
||
|
self._in_declare = False
|
||
|
self._is_create = False
|
||
|
self._begin_depth = 0
|
||
|
|
||
|
self.consume_ws = False
|
||
|
self.tokens = []
|
||
|
self.level = 0
|
||
|
|
||
|
def _change_splitlevel(self, ttype, value):
|
||
|
"""Get the new split level (increase, decrease or remain equal)"""
|
||
|
|
||
|
# parenthesis increase/decrease a level
|
||
|
if ttype is T.Punctuation and value == '(':
|
||
|
return 1
|
||
|
elif ttype is T.Punctuation and value == ')':
|
||
|
return -1
|
||
|
elif ttype not in T.Keyword: # if normal token return
|
||
|
return 0
|
||
|
|
||
|
# Everything after here is ttype = T.Keyword
|
||
|
# Also to note, once entered an If statement you are done and basically
|
||
|
# returning
|
||
|
unified = value.upper()
|
||
|
|
||
|
# three keywords begin with CREATE, but only one of them is DDL
|
||
|
# DDL Create though can contain more words such as "or replace"
|
||
|
if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
|
||
|
self._is_create = True
|
||
|
return 0
|
||
|
|
||
|
# can have nested declare inside of being...
|
||
|
if unified == 'DECLARE' and self._is_create and self._begin_depth == 0:
|
||
|
self._in_declare = True
|
||
|
return 1
|
||
|
|
||
|
if unified == 'BEGIN':
|
||
|
self._begin_depth += 1
|
||
|
if self._is_create:
|
||
|
# FIXME(andi): This makes no sense.
|
||
|
return 1
|
||
|
return 0
|
||
|
|
||
|
# Should this respect a preceding BEGIN?
|
||
|
# In CASE ... WHEN ... END this results in a split level -1.
|
||
|
# Would having multiple CASE WHEN END and a Assignment Operator
|
||
|
# cause the statement to cut off prematurely?
|
||
|
if unified == 'END':
|
||
|
self._begin_depth = max(0, self._begin_depth - 1)
|
||
|
return -1
|
||
|
|
||
|
if (unified in ('IF', 'FOR', 'WHILE', 'CASE')
|
||
|
and self._is_create and self._begin_depth > 0):
|
||
|
return 1
|
||
|
|
||
|
if unified in ('END IF', 'END FOR', 'END WHILE'):
|
||
|
return -1
|
||
|
|
||
|
# Default
|
||
|
return 0
|
||
|
|
||
|
def process(self, stream):
|
||
|
"""Process the stream"""
|
||
|
EOS_TTYPE = T.Whitespace, T.Comment.Single
|
||
|
|
||
|
# Run over all stream tokens
|
||
|
for ttype, value in stream:
|
||
|
# Yield token if we finished a statement and there's no whitespaces
|
||
|
# It will count newline token as a non whitespace. In this context
|
||
|
# whitespace ignores newlines.
|
||
|
# why don't multi line comments also count?
|
||
|
if self.consume_ws and ttype not in EOS_TTYPE:
|
||
|
yield sql.Statement(self.tokens)
|
||
|
|
||
|
# Reset filter and prepare to process next statement
|
||
|
self._reset()
|
||
|
|
||
|
# Change current split level (increase, decrease or remain equal)
|
||
|
self.level += self._change_splitlevel(ttype, value)
|
||
|
|
||
|
# Append the token to the current statement
|
||
|
self.tokens.append(sql.Token(ttype, value))
|
||
|
|
||
|
# Check if we get the end of a statement
|
||
|
if self.level <= 0 and ttype is T.Punctuation and value == ';':
|
||
|
self.consume_ws = True
|
||
|
|
||
|
# Yield pending statement (if any)
|
||
|
if self.tokens and not all(t.is_whitespace for t in self.tokens):
|
||
|
yield sql.Statement(self.tokens)
|