Assume whitespace_split=True, get rid of wordchars/pushback

This commit is contained in:
Florian Bruhin 2014-11-03 21:48:10 +01:00
parent 9180a8b0bb
commit cb76a100c2

View File

@ -19,8 +19,6 @@
"""Our own fork of shlex.split with some added and removed features.""" """Our own fork of shlex.split with some added and removed features."""
from collections import deque
from io import StringIO from io import StringIO
@ -38,17 +36,11 @@ class ShellLexer:
def __init__(self, s): def __init__(self, s):
self.instream = StringIO(s) self.instream = StringIO(s)
self.eof = None self.eof = None
self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
self.wordchars += ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
self.whitespace = ' \t\r\n' self.whitespace = ' \t\r\n'
self.whitespace_split = False
self.quotes = '\'"' self.quotes = '\'"'
self.escape = '\\' self.escape = '\\'
self.escapedquotes = '"' self.escapedquotes = '"'
self.state = ' ' self.state = ' '
self.pushback = deque()
self.debug = 0 self.debug = 0
self.token = '' self.token = ''
if self.debug: if self.debug:
@ -56,12 +48,6 @@ class ShellLexer:
def get_token(self): def get_token(self):
"Get a token from the input stream (or from stack if it's nonempty)" "Get a token from the input stream (or from stack if it's nonempty)"
if self.pushback:
tok = self.pushback.popleft()
if self.debug >= 1:
print("shlex: popping token " + repr(tok))
return tok
# No pushback. Get a token.
raw = self.read_token() raw = self.read_token()
# Maybe we got EOF instead? # Maybe we got EOF instead?
if raw == self.eof: if raw == self.eof:
@ -100,20 +86,11 @@ class ShellLexer:
elif nextchar in self.escape: elif nextchar in self.escape:
escapedstate = 'a' escapedstate = 'a'
self.state = nextchar self.state = nextchar
elif nextchar in self.wordchars:
self.token = nextchar
self.state = 'a'
elif nextchar in self.quotes: elif nextchar in self.quotes:
self.state = nextchar self.state = nextchar
elif self.whitespace_split:
self.token = nextchar
self.state = 'a'
else: else:
self.token = nextchar self.token = nextchar
if self.token or quoted: self.state = 'a'
break # emit current token
else:
continue
elif self.state in self.quotes: elif self.state in self.quotes:
quoted = True quoted = True
if not nextchar: # end of file if not nextchar: # end of file
@ -159,18 +136,8 @@ class ShellLexer:
elif nextchar in self.escape: elif nextchar in self.escape:
escapedstate = 'a' escapedstate = 'a'
self.state = nextchar self.state = nextchar
elif (nextchar in self.wordchars or nextchar in self.quotes or
self.whitespace_split):
self.token = self.token + nextchar
else: else:
self.pushback.appendleft(nextchar) self.token = self.token + nextchar
if self.debug >= 2:
print("shlex: I see punctuation in word state")
self.state = ' '
if self.token:
break # emit current token
else:
continue
result = self.token result = self.token
self.token = '' self.token = ''
if not quoted and result == '': if not quoted and result == '':
@ -197,7 +164,6 @@ def _get_lexer(s):
if s is None: if s is None:
raise TypeError("Refusing to create a lexer with s=None!") raise TypeError("Refusing to create a lexer with s=None!")
lexer = ShellLexer(s) lexer = ShellLexer(s)
lexer.whitespace_split = True
return lexer return lexer