1 """Text wrapping and filling.
2 """
3
4
5
6
7
8 __revision__ = "$Id: textwrap.py,v 1.32.8.2 2004/05/13 01:48:15 gward Exp $"
9
10 import string, re
11
12 try:
13 unicode
14 except NameError:
17
18
19
20
21 try:
22 True, False
23 except NameError:
24 (True, False) = (1, 0)
25
26 __all__ = ['TextWrapper', 'wrap', 'fill']
27
28
29
30
31
32
33
34
35
36 _whitespace = '\t\n\x0b\x0c\r '
37
39 """
40 Object for wrapping/filling text. The public interface consists of
41 the wrap() and fill() methods; the other methods are just there for
42 subclasses to override in order to tweak the default behaviour.
43 If you want to completely replace the main wrapping algorithm,
44 you'll probably have to override _wrap_chunks().
45
46 Several instance attributes control various aspects of wrapping:
47 width (default: 70)
48 the maximum width of wrapped lines (unless break_long_words
49 is false)
50 initial_indent (default: "")
51 string that will be prepended to the first line of wrapped
52 output. Counts towards the line's width.
53 subsequent_indent (default: "")
54 string that will be prepended to all lines save the first
55 of wrapped output; also counts towards each line's width.
56 expand_tabs (default: true)
57 Expand tabs in input text to spaces before further processing.
58 Each tab will become 1 .. 8 spaces, depending on its position in
59 its line. If false, each tab is treated as a single character.
60 replace_whitespace (default: true)
61 Replace all whitespace characters in the input text by spaces
62 after tab expansion. Note that if expand_tabs is false and
63 replace_whitespace is true, every tab will be converted to a
64 single space!
65 fix_sentence_endings (default: false)
66 Ensure that sentence-ending punctuation is always followed
67 by two spaces. Off by default because the algorithm is
68 (unavoidably) imperfect.
69 break_long_words (default: true)
70 Break words longer than 'width'. If false, those words will not
71 be broken, and some lines might be longer than 'width'.
72 """
73
74 whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace))
75
76 unicode_whitespace_trans = {}
77 try:
78 uspace = eval("ord(u' ')")
79 except SyntaxError:
80
81
82
83 uspace = ord(' ')
84 for x in map(ord, _whitespace):
85 unicode_whitespace_trans[x] = uspace
86
87
88
89
90
91
92
93 try:
94 wordsep_re = re.compile(r'(\s+|'
95 r'[^\s\w]*\w{2,}-(?=\w{2,})|'
96 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))')
97 except re.error:
98
99
100
101 wordsep_re = re.compile(r'(\s+|'
102 r'-*\w{2,}-(?=\w{2,}))')
103
104
105
106 sentence_end_re = re.compile(r'[%s]'
107 r'[\.\!\?]'
108 r'[\"\']?'
109 % string.lowercase)
110
111
112 - def __init__(self,
113 width=70,
114 initial_indent="",
115 subsequent_indent="",
116 expand_tabs=True,
117 replace_whitespace=True,
118 fix_sentence_endings=False,
119 break_long_words=True):
120 self.width = width
121 self.initial_indent = initial_indent
122 self.subsequent_indent = subsequent_indent
123 self.expand_tabs = expand_tabs
124 self.replace_whitespace = replace_whitespace
125 self.fix_sentence_endings = fix_sentence_endings
126 self.break_long_words = break_long_words
127
128
129
130
131
132 - def _munge_whitespace(self, text):
133 """_munge_whitespace(text : string) -> string
134
135 Munge whitespace in text: expand tabs and convert all other
136 whitespace characters to spaces. Eg. " foo\tbar\n\nbaz"
137 becomes " foo bar baz".
138 """
139 if self.expand_tabs:
140 text = string.expandtabs(text)
141 if self.replace_whitespace:
142 if type(text) == type(''):
143 text = string.translate(text, self.whitespace_trans)
144 elif isinstance(text, unicode):
145 text = string.translate(text, self.unicode_whitespace_trans)
146 return text
147
148
149 - def _split(self, text):
150 """_split(text : string) -> [string]
151
152 Split the text to wrap into indivisible chunks. Chunks are
153 not quite the same as words; see wrap_chunks() for full
154 details. As an example, the text
155 Look, goof-ball -- use the -b option!
156 breaks into the following chunks:
157 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
158 'use', ' ', 'the', ' ', '-b', ' ', 'option!'
159 """
160 chunks = self.wordsep_re.split(text)
161 chunks = filter(None, chunks)
162 return chunks
163
164 - def _fix_sentence_endings(self, chunks):
165 """_fix_sentence_endings(chunks : [string])
166
167 Correct for sentence endings buried in 'chunks'. Eg. when the
168 original text contains "... foo.\nBar ...", munge_whitespace()
169 and split() will convert that to [..., "foo.", " ", "Bar", ...]
170 which has one too few spaces; this method simply changes the one
171 space to two.
172 """
173 i = 0
174 pat = self.sentence_end_re
175 while i < len(chunks)-1:
176 if chunks[i+1] == " " and pat.search(chunks[i]):
177 chunks[i+1] = " "
178 i = i + 2
179 else:
180 i = i + 1
181
182 - def _handle_long_word(self, chunks, cur_line, cur_len, width):
183 """_handle_long_word(chunks : [string],
184 cur_line : [string],
185 cur_len : int, width : int)
186
187 Handle a chunk of text (most likely a word, not whitespace) that
188 is too long to fit in any line.
189 """
190 space_left = max(width - cur_len, 1)
191
192
193
194 if self.break_long_words:
195 cur_line.append(chunks[0][0:space_left])
196 chunks[0] = chunks[0][space_left:]
197
198
199
200
201 elif not cur_line:
202 cur_line.append(chunks.pop(0))
203
204
205
206
207
208
209
210 - def _wrap_chunks(self, chunks):
211 """_wrap_chunks(chunks : [string]) -> [string]
212
213 Wrap a sequence of text chunks and return a list of lines of
214 length 'self.width' or less. (If 'break_long_words' is false,
215 some lines may be longer than this.) Chunks correspond roughly
216 to words and the whitespace between them: each chunk is
217 indivisible (modulo 'break_long_words'), but a line break can
218 come between any two chunks. Chunks should not have internal
219 whitespace; ie. a chunk is either all whitespace or a "word".
220 Whitespace chunks will be removed from the beginning and end of
221 lines, but apart from that whitespace is preserved.
222 """
223 lines = []
224 if self.width <= 0:
225 raise ValueError("invalid width %r (must be > 0)" % self.width)
226
227 while chunks:
228
229
230
231 cur_line = []
232 cur_len = 0
233
234
235 if lines:
236 indent = self.subsequent_indent
237 else:
238 indent = self.initial_indent
239
240
241 width = self.width - len(indent)
242
243
244
245 if string.strip(chunks[0]) == '' and lines:
246 del chunks[0]
247
248 while chunks:
249 l = len(chunks[0])
250
251
252 if cur_len + l <= width:
253 cur_line.append(chunks.pop(0))
254 cur_len = cur_len + l
255
256
257 else:
258 break
259
260
261
262 if chunks and len(chunks[0]) > width:
263 self._handle_long_word(chunks, cur_line, cur_len, width)
264
265
266 if cur_line and string.strip(cur_line[-1]) == '':
267 del cur_line[-1]
268
269
270
271 if cur_line:
272 lines.append(indent + string.join(cur_line, ''))
273
274 return lines
275
276
277
278
279 - def wrap(self, text):
280 """wrap(text : string) -> [string]
281
282 Reformat the single paragraph in 'text' so it fits in lines of
283 no more than 'self.width' columns, and return a list of wrapped
284 lines. Tabs in 'text' are expanded with string.expandtabs(),
285 and all other whitespace characters (including newline) are
286 converted to space.
287 """
288 text = self._munge_whitespace(text)
289 indent = self.initial_indent
290 chunks = self._split(text)
291 if self.fix_sentence_endings:
292 self._fix_sentence_endings(chunks)
293 return self._wrap_chunks(chunks)
294
295 - def fill(self, text):
296 """fill(text : string) -> string
297
298 Reformat the single paragraph in 'text' to fit in lines of no
299 more than 'self.width' columns, and return a new string
300 containing the entire wrapped paragraph.
301 """
302 return string.join(self.wrap(text), "\n")
303
304
305
306
307 -def wrap(text, width=70, **kwargs):
308 """Wrap a single paragraph of text, returning a list of wrapped lines.
309
310 Reformat the single paragraph in 'text' so it fits in lines of no
311 more than 'width' columns, and return a list of wrapped lines. By
312 default, tabs in 'text' are expanded with string.expandtabs(), and
313 all other whitespace characters (including newline) are converted to
314 space. See TextWrapper class for available keyword args to customize
315 wrapping behaviour.
316 """
317 kw = kwargs.copy()
318 kw['width'] = width
319 w = apply(TextWrapper, (), kw)
320 return w.wrap(text)
321
322 -def fill(text, width=70, **kwargs):
323 """Fill a single paragraph of text, returning a new string.
324
325 Reformat the single paragraph in 'text' to fit in lines of no more
326 than 'width' columns, and return a new string containing the entire
327 wrapped paragraph. As with wrap(), tabs are expanded and other
328 whitespace characters converted to space. See TextWrapper class for
329 available keyword args to customize wrapping behaviour.
330 """
331 kw = kwargs.copy()
332 kw['width'] = width
333 w = apply(TextWrapper, (), kw)
334 return w.fill(text)
335
336
337
338
340 """dedent(text : string) -> string
341
342 Remove any whitespace than can be uniformly removed from the left
343 of every line in `text`.
344
345 This can be used e.g. to make triple-quoted strings line up with
346 the left edge of screen/whatever, while still presenting it in the
347 source code in indented form.
348
349 For example:
350
351 def test():
352 # end first line with \ to avoid the empty line!
353 s = '''\
354 hello
355 world
356 '''
357 print repr(s) # prints ' hello\n world\n '
358 print repr(dedent(s)) # prints 'hello\n world\n'
359 """
360 lines = text.expandtabs().split('\n')
361 margin = None
362 for line in lines:
363 content = line.lstrip()
364 if not content:
365 continue
366 indent = len(line) - len(content)
367 if margin is None:
368 margin = indent
369 else:
370 margin = min(margin, indent)
371
372 if margin is not None and margin > 0:
373 for i in range(len(lines)):
374 lines[i] = lines[i][margin:]
375
376 return string.join(lines, '\n')
377
378
379
380
381
382
383