1 """SCons.Scanner.LaTeX
2
3 This module implements the dependency scanner for LaTeX code.
4
5 """
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 __revision__ = "src/engine/SCons/Scanner/LaTeX.py rel_2.4.0:3365:9259ea1c13d7 2015/09/21 14:03:43 bdbaddog"
31
32 import os.path
33 import re
34
35 import SCons.Scanner
36 import SCons.Util
37
38
39 TexGraphics = ['.eps', '.ps']
40 LatexGraphics = ['.pdf', '.png', '.jpg', '.gif', '.tif']
41
42
45 _null = _Null
46
47
48
49
50
51
77
79 """A class to bind a specific *PATH variable name to a function that
80 will return all of the *path directories."""
82 self.variable = variable
83 - def __call__(self, env, dir=None, target=None, source=None, argument=None):
93
94
95
97 """Return a prototype Scanner instance for scanning LaTeX source files
98 when built with latex.
99 """
100 ds = LaTeX(name = "LaTeXScanner",
101 suffixes = '$LATEXSUFFIXES',
102
103 graphics_extensions = TexGraphics,
104 recursive = 0)
105 return ds
106
108 """Return a prototype Scanner instance for scanning LaTeX source files
109 when built with pdflatex.
110 """
111 ds = LaTeX(name = "PDFLaTeXScanner",
112 suffixes = '$LATEXSUFFIXES',
113
114 graphics_extensions = LatexGraphics,
115 recursive = 0)
116 return ds
117
118 -class LaTeX(SCons.Scanner.Base):
119 """Class for scanning LaTeX files for included files.
120
121 Unlike most scanners, which use regular expressions that just
122 return the included file name, this returns a tuple consisting
123 of the keyword for the inclusion ("include", "includegraphics",
124 "input", or "bibliography"), and then the file name itself.
125 Based on a quick look at LaTeX documentation, it seems that we
126 should append .tex suffix for the "include" keywords, append .tex if
127 there is no extension for the "input" keyword, and need to add .bib
128 for the "bibliography" keyword that does not accept extensions by itself.
129
130 Finally, if there is no extension for an "includegraphics" keyword
131 latex will append .ps or .eps to find the file, while pdftex may use .pdf,
132 .jpg, .tif, .mps, or .png.
133
134 The actual subset and search order may be altered by
135 DeclareGraphicsExtensions command. This complication is ignored.
136 The default order corresponds to experimentation with teTeX
137 $ latex --version
138 pdfeTeX 3.141592-1.21a-2.2 (Web2C 7.5.4)
139 kpathsea version 3.5.4
140 The order is:
141 ['.eps', '.ps'] for latex
142 ['.png', '.pdf', '.jpg', '.tif'].
143
144 Another difference is that the search path is determined by the type
145 of the file being searched:
146 env['TEXINPUTS'] for "input" and "include" keywords
147 env['TEXINPUTS'] for "includegraphics" keyword
148 env['TEXINPUTS'] for "lstinputlisting" keyword
149 env['BIBINPUTS'] for "bibliography" keyword
150 env['BSTINPUTS'] for "bibliographystyle" keyword
151 env['INDEXSTYLE'] for "makeindex" keyword, no scanning support needed
152 just allows user to set it if needed.
153
154 FIXME: also look for the class or style in document[class|style]{}
155 FIXME: also look for the argument of bibliographystyle{}
156 """
157 keyword_paths = {'include': 'TEXINPUTS',
158 'input': 'TEXINPUTS',
159 'includegraphics': 'TEXINPUTS',
160 'bibliography': 'BIBINPUTS',
161 'bibliographystyle': 'BSTINPUTS',
162 'addbibresource': 'BIBINPUTS',
163 'addglobalbib': 'BIBINPUTS',
164 'addsectionbib': 'BIBINPUTS',
165 'makeindex': 'INDEXSTYLE',
166 'usepackage': 'TEXINPUTS',
167 'lstinputlisting': 'TEXINPUTS'}
168 env_variables = SCons.Util.unique(list(keyword_paths.values()))
169
170 - def __init__(self, name, suffixes, graphics_extensions, *args, **kw):
171
172
173
174
175
176
177
178 regex = r'^[^%\n]*\\(include|includegraphics(?:\s*\[[^\]]+\])?|lstinputlisting(?:\[[^\]]+\])?|input|bibliography|addbibresource|addglobalbib|addsectionbib|usepackage)\s*{([^}]*)}'
179 self.cre = re.compile(regex, re.M)
180 self.comment_re = re.compile(r'^((?:(?:\\%)|[^%\n])*)(.*)$', re.M)
181
182 self.graphics_extensions = graphics_extensions
183
184 def _scan(node, env, path=(), self=self):
185 node = node.rfile()
186 if not node.exists():
187 return []
188 return self.scan_recurse(node, path)
189
190 class FindMultiPathDirs(object):
191 """The stock FindPathDirs function has the wrong granularity:
192 it is called once per target, while we need the path that depends
193 on what kind of included files is being searched. This wrapper
194 hides multiple instances of FindPathDirs, one per the LaTeX path
195 variable in the environment. When invoked, the function calculates
196 and returns all the required paths as a dictionary (converted into
197 a tuple to become hashable). Then the scan function converts it
198 back and uses a dictionary of tuples rather than a single tuple
199 of paths.
200 """
201 def __init__(self, dictionary):
202 self.dictionary = {}
203 for k,n in dictionary.items():
204 self.dictionary[k] = ( SCons.Scanner.FindPathDirs(n),
205 FindENVPathDirs(n) )
206
207 def __call__(self, env, dir=None, target=None, source=None,
208 argument=None):
209 di = {}
210 for k,(c,cENV) in self.dictionary.items():
211 di[k] = ( c(env, dir=None, target=None, source=None,
212 argument=None) ,
213 cENV(env, dir=None, target=None, source=None,
214 argument=None) )
215
216 return tuple(di.items())
217
218 class LaTeXScanCheck(object):
219 """Skip all but LaTeX source files, i.e., do not scan *.eps,
220 *.pdf, *.jpg, etc.
221 """
222 def __init__(self, suffixes):
223 self.suffixes = suffixes
224 def __call__(self, node, env):
225 current = not node.has_builder() or node.is_up_to_date()
226 scannable = node.get_suffix() in env.subst_list(self.suffixes)[0]
227
228 return scannable and current
229
230 kw['function'] = _scan
231 kw['path_function'] = FindMultiPathDirs(LaTeX.keyword_paths)
232 kw['recursive'] = 0
233 kw['skeys'] = suffixes
234 kw['scan_check'] = LaTeXScanCheck(suffixes)
235 kw['name'] = name
236
237 SCons.Scanner.Base.__init__(self, *args, **kw)
238
240 filename = include[1]
241 if include[0] == 'input':
242 base, ext = os.path.splitext( filename )
243 if ext == "":
244 return [filename + '.tex']
245 if (include[0] == 'include'):
246 return [filename + '.tex']
247 if include[0] == 'bibliography':
248 base, ext = os.path.splitext( filename )
249 if ext == "":
250 return [filename + '.bib']
251 if include[0] == 'usepackage':
252 base, ext = os.path.splitext( filename )
253 if ext == "":
254 return [filename + '.sty']
255 if include[0] == 'includegraphics':
256 base, ext = os.path.splitext( filename )
257 if ext == "":
258
259
260
261
262
263 return [filename+e for e in self.graphics_extensions]
264 return [filename]
265
268
270 try:
271 sub_path = path[include[0]]
272 except (IndexError, KeyError):
273 sub_path = ()
274 try_names = self._latex_names(include)
275 for n in try_names:
276
277 i = SCons.Node.FS.find_file(n, (source_dir,) + sub_path[0])
278 if i:
279 return i, include
280
281 i = SCons.Node.FS.find_file(n, (source_dir,) + sub_path[1])
282 if i:
283 return i, include
284 return i, include
285
286 - def canonical_text(self, text):
287 """Standardize an input TeX-file contents.
288
289 Currently:
290 * removes comments, unwrapping comment-wrapped lines.
291 """
292 out = []
293 line_continues_a_comment = False
294 for line in text.splitlines():
295 line,comment = self.comment_re.findall(line)[0]
296 if line_continues_a_comment == True:
297 out[-1] = out[-1] + line.lstrip()
298 else:
299 out.append(line)
300 line_continues_a_comment = len(comment) > 0
301 return '\n'.join(out).rstrip()+'\n'
302
303 - def scan(self, node):
304
305
306
307
308
309
310
311 noopt_cre = re.compile('\s*\[.*$')
312 if node.includes != None:
313 includes = node.includes
314 else:
315 text = self.canonical_text(node.get_text_contents())
316 includes = self.cre.findall(text)
317
318
319
320
321
322
323
324
325
326 split_includes = []
327 for include in includes:
328 inc_type = noopt_cre.sub('', include[0])
329 inc_list = include[1].split(',')
330 for j in range(len(inc_list)):
331 split_includes.append( (inc_type, inc_list[j]) )
332
333 includes = split_includes
334 node.includes = includes
335
336 return includes
337
339 """ do a recursive scan of the top level target file
340 This lets us search for included files based on the
341 directory of the main file just as latex does"""
342
343 path_dict = dict(list(path))
344
345 queue = []
346 queue.extend( self.scan(node) )
347 seen = {}
348
349
350
351
352
353
354
355
356 nodes = []
357 source_dir = node.get_dir()
358
359 while queue:
360
361 include = queue.pop()
362 try:
363 if seen[include[1]] == 1:
364 continue
365 except KeyError:
366 seen[include[1]] = 1
367
368
369
370
371 n, i = self.find_include(include, source_dir, path_dict)
372 if n is None:
373
374
375 if include[0] != 'usepackage':
376 SCons.Warnings.warn(SCons.Warnings.DependencyWarning,
377 "No dependency generated for file: %s (included from: %s) -- file not found" % (i, node))
378 else:
379 sortkey = self.sort_key(n)
380 nodes.append((sortkey, n))
381
382 queue.extend( self.scan(n) )
383
384 return [pair[1] for pair in sorted(nodes)]
385
386
387
388
389
390
391