1
2 """A lexical analyzer class for simple shell-like syntaxes."""
3
4
5
6
7
8
9
10 import os.path
11 import sys
12
13
22 return self.data.pop(0)
23
24 try:
25 basestring
26 except NameError:
27 import types
29 return type(s) is types.StringType
30 else:
32 return isinstance(s, basestring)
33
34 try:
35 from cStringIO import StringIO
36 except ImportError:
37 from StringIO import StringIO
38
39 __all__ = ["shlex", "split"]
40
42 "A lexical analyzer class for simple shell-like syntaxes."
44 if is_basestring(instream):
45 instream = StringIO(instream)
46 if instream is not None:
47 self.instream = instream
48 self.infile = infile
49 else:
50 self.instream = sys.stdin
51 self.infile = None
52 self.posix = posix
53 if posix:
54 self.eof = None
55 else:
56 self.eof = ''
57 self.commenters = '#'
58 self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
59 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
60 if self.posix:
61 self.wordchars = self.wordchars + ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
62 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
63 self.whitespace = ' \t\r\n'
64 self.whitespace_split = False
65 self.quotes = '\'"'
66 self.escape = '\\'
67 self.escapedquotes = '"'
68 self.state = ' '
69 self.pushback = deque()
70 self.lineno = 1
71 self.debug = 0
72 self.token = ''
73 self.filestack = deque()
74 self.source = None
75 if self.debug:
76 print 'shlex: reading from %s, line %d' \
77 % (self.instream, self.lineno)
78
80 "Push a token onto the stack popped by the get_token method"
81 if self.debug >= 1:
82 print "shlex: pushing token " + repr(tok)
83 self.pushback.appendleft(tok)
84
86 "Push an input source onto the lexer's input source stack."
87 if is_basestring(newstream):
88 newstream = StringIO(newstream)
89 self.filestack.appendleft((self.infile, self.instream, self.lineno))
90 self.infile = newfile
91 self.instream = newstream
92 self.lineno = 1
93 if self.debug:
94 if newfile is not None:
95 print 'shlex: pushing to file %s' % (self.infile,)
96 else:
97 print 'shlex: pushing to stream %s' % (self.instream,)
98
100 "Pop the input source stack."
101 self.instream.close()
102 (self.infile, self.instream, self.lineno) = self.filestack.popleft()
103 if self.debug:
104 print 'shlex: popping to %s, line %d' \
105 % (self.instream, self.lineno)
106 self.state = ' '
107
109 "Get a token from the input stream (or from stack if it's nonempty)"
110 if self.pushback:
111 tok = self.pushback.popleft()
112 if self.debug >= 1:
113 print "shlex: popping token " + repr(tok)
114 return tok
115
116 raw = self.read_token()
117
118 if self.source is not None:
119 while raw == self.source:
120 spec = self.sourcehook(self.read_token())
121 if spec:
122 (newfile, newstream) = spec
123 self.push_source(newstream, newfile)
124 raw = self.get_token()
125
126 while raw == self.eof:
127 if not self.filestack:
128 return self.eof
129 else:
130 self.pop_source()
131 raw = self.get_token()
132
133 if self.debug >= 1:
134 if raw != self.eof:
135 print "shlex: token=" + repr(raw)
136 else:
137 print "shlex: token=EOF"
138 return raw
139
141 quoted = False
142 escapedstate = ' '
143 while True:
144 nextchar = self.instream.read(1)
145 if nextchar == '\n':
146 self.lineno = self.lineno + 1
147 if self.debug >= 3:
148 print "shlex: in state", repr(self.state), \
149 "I see character:", repr(nextchar)
150 if self.state is None:
151 self.token = ''
152 break
153 elif self.state == ' ':
154 if not nextchar:
155 self.state = None
156 break
157 elif nextchar in self.whitespace:
158 if self.debug >= 2:
159 print "shlex: I see whitespace in whitespace state"
160 if self.token or (self.posix and quoted):
161 break
162 else:
163 continue
164 elif nextchar in self.commenters:
165 self.instream.readline()
166 self.lineno = self.lineno + 1
167 elif self.posix and nextchar in self.escape:
168 escapedstate = 'a'
169 self.state = nextchar
170 elif nextchar in self.wordchars:
171 self.token = nextchar
172 self.state = 'a'
173 elif nextchar in self.quotes:
174 if not self.posix:
175 self.token = nextchar
176 self.state = nextchar
177 elif self.whitespace_split:
178 self.token = nextchar
179 self.state = 'a'
180 else:
181 self.token = nextchar
182 if self.token or (self.posix and quoted):
183 break
184 else:
185 continue
186 elif self.state in self.quotes:
187 quoted = True
188 if not nextchar:
189 if self.debug >= 2:
190 print "shlex: I see EOF in quotes state"
191
192 raise ValueError, "No closing quotation"
193 if nextchar == self.state:
194 if not self.posix:
195 self.token = self.token + nextchar
196 self.state = ' '
197 break
198 else:
199 self.state = 'a'
200 elif self.posix and nextchar in self.escape and \
201 self.state in self.escapedquotes:
202 escapedstate = self.state
203 self.state = nextchar
204 else:
205 self.token = self.token + nextchar
206 elif self.state in self.escape:
207 if not nextchar:
208 if self.debug >= 2:
209 print "shlex: I see EOF in escape state"
210
211 raise ValueError, "No escaped character"
212
213
214 if escapedstate in self.quotes and \
215 nextchar != self.state and nextchar != escapedstate:
216 self.token = self.token + self.state
217 self.token = self.token + nextchar
218 self.state = escapedstate
219 elif self.state == 'a':
220 if not nextchar:
221 self.state = None
222 break
223 elif nextchar in self.whitespace:
224 if self.debug >= 2:
225 print "shlex: I see whitespace in word state"
226 self.state = ' '
227 if self.token or (self.posix and quoted):
228 break
229 else:
230 continue
231 elif nextchar in self.commenters:
232 self.instream.readline()
233 self.lineno = self.lineno + 1
234 if self.posix:
235 self.state = ' '
236 if self.token or (self.posix and quoted):
237 break
238 else:
239 continue
240 elif self.posix and nextchar in self.quotes:
241 self.state = nextchar
242 elif self.posix and nextchar in self.escape:
243 escapedstate = 'a'
244 self.state = nextchar
245 elif nextchar in self.wordchars or nextchar in self.quotes \
246 or self.whitespace_split:
247 self.token = self.token + nextchar
248 else:
249 self.pushback.appendleft(nextchar)
250 if self.debug >= 2:
251 print "shlex: I see punctuation in word state"
252 self.state = ' '
253 if self.token:
254 break
255 else:
256 continue
257 result = self.token
258 self.token = ''
259 if self.posix and not quoted and result == '':
260 result = None
261 if self.debug > 1:
262 if result:
263 print "shlex: raw token=" + repr(result)
264 else:
265 print "shlex: raw token=EOF"
266 return result
267
269 "Hook called on a filename to be sourced."
270 if newfile[0] == '"':
271 newfile = newfile[1:-1]
272
273 if is_basestring(self.infile) and not os.path.isabs(newfile):
274 newfile = os.path.join(os.path.dirname(self.infile), newfile)
275 return (newfile, open(newfile, "r"))
276
278 "Emit a C-compiler-like, Emacs-friendly error-message leader."
279 if infile is None:
280 infile = self.infile
281 if lineno is None:
282 lineno = self.lineno
283 return "\"%s\", line %d: " % (infile, lineno)
284
287
289 token = self.get_token()
290 if token == self.eof:
291 raise StopIteration
292 return token
293
307
308 if __name__ == '__main__':
309 if len(sys.argv) == 1:
310 lexer = shlex()
311 else:
312 file = sys.argv[1]
313 lexer = shlex(open(file), file)
314 while 1:
315 tt = lexer.get_token()
316 if tt:
317 print "Token: " + repr(tt)
318 else:
319 break
320
321
322
323
324
325
326