1 """SCons.Scanner.LaTeX
2
3 This module implements the dependency scanner for LaTeX code.
4
5 """
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 __revision__ = "src/engine/SCons/Scanner/LaTeX.py 3a41ed6b288cee8d085373ad7fa02894e1903864 2019-01-23 17:30:35 bdeegan"
31
32 import os.path
33 import re
34
35 import SCons.Scanner
36 import SCons.Util
37
38
39 TexGraphics = ['.eps', '.ps']
40
41 LatexGraphics = [ '.png', '.jpg', '.gif', '.tif']
42
43
44
47 _null = _Null
48
49
50
51
52
53
79
81 """
82 A class to bind a specific E{*}PATH variable name to a function that
83 will return all of the E{*}path directories.
84 """
86 self.variable = variable
87 - def __call__(self, env, dir=None, target=None, source=None, argument=None):
97
98
99
101 """
102 Return a prototype Scanner instance for scanning LaTeX source files
103 when built with latex.
104 """
105 ds = LaTeX(name = "LaTeXScanner",
106 suffixes = '$LATEXSUFFIXES',
107
108 graphics_extensions = TexGraphics,
109 recursive = 0)
110 return ds
111
113 """
114 Return a prototype Scanner instance for scanning LaTeX source files
115 when built with pdflatex.
116 """
117 ds = LaTeX(name = "PDFLaTeXScanner",
118 suffixes = '$LATEXSUFFIXES',
119
120 graphics_extensions = LatexGraphics,
121 recursive = 0)
122 return ds
123
124 -class LaTeX(SCons.Scanner.Base):
125 """
126 Class for scanning LaTeX files for included files.
127
128 Unlike most scanners, which use regular expressions that just
129 return the included file name, this returns a tuple consisting
130 of the keyword for the inclusion ("include", "includegraphics",
131 "input", or "bibliography"), and then the file name itself.
132 Based on a quick look at LaTeX documentation, it seems that we
133 should append .tex suffix for the "include" keywords, append .tex if
134 there is no extension for the "input" keyword, and need to add .bib
135 for the "bibliography" keyword that does not accept extensions by itself.
136
137 Finally, if there is no extension for an "includegraphics" keyword
138 latex will append .ps or .eps to find the file, while pdftex may use .pdf,
139 .jpg, .tif, .mps, or .png.
140
141 The actual subset and search order may be altered by
142 DeclareGraphicsExtensions command. This complication is ignored.
143 The default order corresponds to experimentation with teTeX::
144
145 $ latex --version
146 pdfeTeX 3.141592-1.21a-2.2 (Web2C 7.5.4)
147 kpathsea version 3.5.4
148
149 The order is:
150 ['.eps', '.ps'] for latex
151 ['.png', '.pdf', '.jpg', '.tif'].
152
153 Another difference is that the search path is determined by the type
154 of the file being searched:
155 env['TEXINPUTS'] for "input" and "include" keywords
156 env['TEXINPUTS'] for "includegraphics" keyword
157 env['TEXINPUTS'] for "lstinputlisting" keyword
158 env['BIBINPUTS'] for "bibliography" keyword
159 env['BSTINPUTS'] for "bibliographystyle" keyword
160 env['INDEXSTYLE'] for "makeindex" keyword, no scanning support needed just allows user to set it if needed.
161
162 FIXME: also look for the class or style in document[class|style]{}
163 FIXME: also look for the argument of bibliographystyle{}
164 """
165 keyword_paths = {'include': 'TEXINPUTS',
166 'input': 'TEXINPUTS',
167 'includegraphics': 'TEXINPUTS',
168 'bibliography': 'BIBINPUTS',
169 'bibliographystyle': 'BSTINPUTS',
170 'addbibresource': 'BIBINPUTS',
171 'addglobalbib': 'BIBINPUTS',
172 'addsectionbib': 'BIBINPUTS',
173 'makeindex': 'INDEXSTYLE',
174 'usepackage': 'TEXINPUTS',
175 'lstinputlisting': 'TEXINPUTS'}
176 env_variables = SCons.Util.unique(list(keyword_paths.values()))
177 two_arg_commands = ['import', 'subimport',
178 'includefrom', 'subincludefrom',
179 'inputfrom', 'subinputfrom']
180
181 - def __init__(self, name, suffixes, graphics_extensions, *args, **kw):
182
183
184
185
186
187
188
189 regex = r'''
190 ^[^%\n]*
191 \\(
192 include
193 | includegraphics(?:\s*\[[^\]]+\])?
194 | lstinputlisting(?:\[[^\]]+\])?
195 | input
196 | import
197 | subimport
198 | includefrom
199 | subincludefrom
200 | inputfrom
201 | subinputfrom
202 | bibliography
203 | addbibresource
204 | addglobalbib
205 | addsectionbib
206 | usepackage
207 )
208 \s*{([^}]*)} # first arg
209 (?: \s*{([^}]*)} )? # maybe another arg
210 '''
211 self.cre = re.compile(regex, re.M | re.X)
212 self.comment_re = re.compile(r'^((?:(?:\\%)|[^%\n])*)(.*)$', re.M)
213
214 self.graphics_extensions = graphics_extensions
215
216 def _scan(node, env, path=(), self=self):
217 node = node.rfile()
218 if not node.exists():
219 return []
220 return self.scan_recurse(node, path)
221
222 class FindMultiPathDirs(object):
223 """The stock FindPathDirs function has the wrong granularity:
224 it is called once per target, while we need the path that depends
225 on what kind of included files is being searched. This wrapper
226 hides multiple instances of FindPathDirs, one per the LaTeX path
227 variable in the environment. When invoked, the function calculates
228 and returns all the required paths as a dictionary (converted into
229 a tuple to become hashable). Then the scan function converts it
230 back and uses a dictionary of tuples rather than a single tuple
231 of paths.
232 """
233 def __init__(self, dictionary):
234 self.dictionary = {}
235 for k,n in dictionary.items():
236 self.dictionary[k] = ( SCons.Scanner.FindPathDirs(n),
237 FindENVPathDirs(n) )
238
239 def __call__(self, env, dir=None, target=None, source=None,
240 argument=None):
241 di = {}
242 for k,(c,cENV) in self.dictionary.items():
243 di[k] = ( c(env, dir=None, target=None, source=None,
244 argument=None) ,
245 cENV(env, dir=None, target=None, source=None,
246 argument=None) )
247
248 return tuple(di.items())
249
250 class LaTeXScanCheck(object):
251 """Skip all but LaTeX source files, i.e., do not scan *.eps,
252 *.pdf, *.jpg, etc.
253 """
254 def __init__(self, suffixes):
255 self.suffixes = suffixes
256 def __call__(self, node, env):
257 current = not node.has_builder() or node.is_up_to_date()
258 scannable = node.get_suffix() in env.subst_list(self.suffixes)[0]
259
260 return scannable and current
261
262 kw['function'] = _scan
263 kw['path_function'] = FindMultiPathDirs(LaTeX.keyword_paths)
264 kw['recursive'] = 0
265 kw['skeys'] = suffixes
266 kw['scan_check'] = LaTeXScanCheck(suffixes)
267 kw['name'] = name
268
269 SCons.Scanner.Base.__init__(self, *args, **kw)
270
272 if include_type == 'input':
273 base, ext = os.path.splitext( filename )
274 if ext == "":
275 return [filename + '.tex']
276 if include_type in ('include', 'import', 'subimport',
277 'includefrom', 'subincludefrom',
278 'inputfrom', 'subinputfrom'):
279 base, ext = os.path.splitext( filename )
280 if ext == "":
281 return [filename + '.tex']
282 if include_type == 'bibliography':
283 base, ext = os.path.splitext( filename )
284 if ext == "":
285 return [filename + '.bib']
286 if include_type == 'usepackage':
287 base, ext = os.path.splitext( filename )
288 if ext == "":
289 return [filename + '.sty']
290 if include_type == 'includegraphics':
291 base, ext = os.path.splitext( filename )
292 if ext == "":
293
294
295
296
297
298 return [filename+e for e in self.graphics_extensions]
299 return [filename]
300
303
305 inc_type, inc_subdir, inc_filename = include
306 try:
307 sub_paths = path[inc_type]
308 except (IndexError, KeyError):
309 sub_paths = ((), ())
310 try_names = self._latex_names(inc_type, inc_filename)
311
312
313
314
315
316 search_paths = [(source_dir,)] + list(sub_paths)
317
318 for n in try_names:
319 for search_path in search_paths:
320 paths = tuple([d.Dir(inc_subdir) for d in search_path])
321 i = SCons.Node.FS.find_file(n, paths)
322 if i:
323 return i, include
324 return None, include
325
326 - def canonical_text(self, text):
327 """Standardize an input TeX-file contents.
328
329 Currently:
330 * removes comments, unwrapping comment-wrapped lines.
331 """
332 out = []
333 line_continues_a_comment = False
334 for line in text.splitlines():
335 line,comment = self.comment_re.findall(line)[0]
336 if line_continues_a_comment:
337 out[-1] = out[-1] + line.lstrip()
338 else:
339 out.append(line)
340 line_continues_a_comment = len(comment) > 0
341 return '\n'.join(out).rstrip()+'\n'
342
343 - def scan(self, node, subdir='.'):
344
345
346
347
348
349
350
351 noopt_cre = re.compile('\s*\[.*$')
352 if node.includes is not None:
353 includes = node.includes
354 else:
355 text = self.canonical_text(node.get_text_contents())
356 includes = self.cre.findall(text)
357
358
359
360
361
362
363
364
365
366 split_includes = []
367 for include in includes:
368 inc_type = noopt_cre.sub('', include[0])
369 inc_subdir = subdir
370 if inc_type in self.two_arg_commands:
371 inc_subdir = os.path.join(subdir, include[1])
372 inc_list = include[2].split(',')
373 else:
374 inc_list = include[1].split(',')
375 for j in range(len(inc_list)):
376 split_includes.append( (inc_type, inc_subdir, inc_list[j]) )
377
378 includes = split_includes
379 node.includes = includes
380
381 return includes
382
384 """ do a recursive scan of the top level target file
385 This lets us search for included files based on the
386 directory of the main file just as latex does"""
387
388 path_dict = dict(list(path))
389
390 queue = []
391 queue.extend( self.scan(node) )
392 seen = {}
393
394
395
396
397
398
399
400
401 nodes = []
402 source_dir = node.get_dir()
403
404 while queue:
405
406 include = queue.pop()
407 inc_type, inc_subdir, inc_filename = include
408
409 try:
410 if seen[inc_filename] == 1:
411 continue
412 except KeyError:
413 seen[inc_filename] = 1
414
415
416
417
418 n, i = self.find_include(include, source_dir, path_dict)
419 if n is None:
420
421
422 if inc_type != 'usepackage':
423 SCons.Warnings.warn(SCons.Warnings.DependencyWarning,
424 "No dependency generated for file: %s (included from: %s) -- file not found" % (i, node))
425 else:
426 sortkey = self.sort_key(n)
427 nodes.append((sortkey, n))
428
429 queue.extend( self.scan(n, inc_subdir) )
430
431 return [pair[1] for pair in sorted(nodes)]
432
433
434
435
436
437
438