# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: # Copyright 2014 Florian Bruhin (The Compiler) # # This file is part of qutebrowser. # # qutebrowser is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # qutebrowser is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with qutebrowser. If not, see . """Our own fork of shlex.split with some added and removed features.""" from qutebrowser.utils import log class ShellLexer: """A lexical analyzer class for simple shell-like syntaxes. Based on Python's shlex, but cleaned up, removed some features, and added some features useful for qutebrowser. Attributes: FIXME """ def __init__(self, s): self.iterator = iter(s) self.whitespace = ' \t\r' self.quotes = '\'"' self.escape = '\\' self.escapedquotes = '"' self.keep = False def reset(self): self.quoted = False self.escapedstate = ' ' self.token = '' self.state = ' ' def __iter__(self): """Read a raw token from the input stream.""" self.quoted = False self.escapedstate = ' ' self.token = '' self.state = ' ' while True: try: nextchar = next(self.iterator) except StopIteration: nextchar = None log.shlexer.vdebug("in state {!r} I see character: {!r}".format( self.state, nextchar)) if nextchar is None: if self.state in self.escape and not self.keep: self.token += self.state if self.token or self.quoted: yield self.token return elif self.state == ' ': if nextchar in self.whitespace: log.shlexer.vdebug("I see whitespace in whitespace state") if self.keep: self.token += nextchar if self.token or self.quoted: yield self.token self.reset() elif nextchar in self.escape: if self.keep: self.token += nextchar self.escapedstate = 'a' self.state = nextchar elif nextchar in self.quotes: if self.keep: self.token += nextchar self.state = nextchar else: self.token = nextchar self.state = 'a' elif self.state in self.quotes: self.quoted = True if nextchar == self.state: if self.keep: self.token += nextchar self.state = 'a' elif (nextchar in self.escape and self.state in self.escapedquotes): if self.keep: self.token += nextchar self.escapedstate = self.state self.state = nextchar else: self.token += nextchar elif self.state in self.escape: # In posix shells, only the quote itself or the escape # character may be escaped within quotes. if (self.escapedstate in self.quotes and nextchar != self.state and nextchar != self.escapedstate and not self.keep): self.token += self.state self.token += nextchar self.state = self.escapedstate elif self.state == 'a': if nextchar in self.whitespace: log.shlexer.vdebug("shlex: I see whitespace in word state") self.state = ' ' if self.keep: self.token += nextchar if self.token or self.quoted: yield self.token self.reset() elif nextchar in self.quotes: if self.keep: self.token += nextchar self.state = nextchar elif nextchar in self.escape: if self.keep: self.token += nextchar self.escapedstate = 'a' self.state = nextchar else: self.token += nextchar def split(s, keep=False): """Split a string via ShellLexer. Args: keep: Whether to keep are special chars in the split output. """ lexer = ShellLexer(s) lexer.keep = keep tokens = list(lexer) if not tokens: return [] out = [] if tokens[0].isspace(): out.append(tokens[0] + tokens[1]) tokens = tokens[2:] for t in tokens: if t.isspace(): out[-1] += t else: out.append(t) return out