1 """SCons.Scanner.LaTeX
2
3 This module implements the dependency scanner for LaTeX code.
4
5 """
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 __revision__ = "src/engine/SCons/Scanner/LaTeX.py e724ae812eb96f4858a132f5b8c769724744faf6 2019-07-21 00:04:47 bdeegan"
31
32 import os.path
33 import re
34
35 import SCons.Scanner
36 import SCons.Util
37
38
39 TexGraphics = ['.eps', '.ps']
40
41 LatexGraphics = [ '.png', '.jpg', '.gif', '.tif']
42
43
44
47 _null = _Null
48
49
50
51
52
53
79
81 """
82 A class to bind a specific E{*}PATH variable name to a function that
83 will return all of the E{*}path directories.
84 """
86 self.variable = variable
87 - def __call__(self, env, dir=None, target=None, source=None, argument=None):
97
98
99
101 """
102 Return a prototype Scanner instance for scanning LaTeX source files
103 when built with latex.
104 """
105 ds = LaTeX(name = "LaTeXScanner",
106 suffixes = '$LATEXSUFFIXES',
107
108 graphics_extensions = TexGraphics,
109 recursive = 0)
110 return ds
111
113 """
114 Return a prototype Scanner instance for scanning LaTeX source files
115 when built with pdflatex.
116 """
117 ds = LaTeX(name = "PDFLaTeXScanner",
118 suffixes = '$LATEXSUFFIXES',
119
120 graphics_extensions = LatexGraphics,
121 recursive = 0)
122 return ds
123
124 -class LaTeX(SCons.Scanner.Base):
125 """
126 Class for scanning LaTeX files for included files.
127
128 Unlike most scanners, which use regular expressions that just
129 return the included file name, this returns a tuple consisting
130 of the keyword for the inclusion ("include", "includegraphics",
131 "input", or "bibliography"), and then the file name itself.
132 Based on a quick look at LaTeX documentation, it seems that we
133 should append .tex suffix for the "include" keywords, append .tex if
134 there is no extension for the "input" keyword, and need to add .bib
135 for the "bibliography" keyword that does not accept extensions by itself.
136
137 Finally, if there is no extension for an "includegraphics" keyword
138 latex will append .ps or .eps to find the file, while pdftex may use .pdf,
139 .jpg, .tif, .mps, or .png.
140
141 The actual subset and search order may be altered by
142 DeclareGraphicsExtensions command. This complication is ignored.
143 The default order corresponds to experimentation with teTeX::
144
145 $ latex --version
146 pdfeTeX 3.141592-1.21a-2.2 (Web2C 7.5.4)
147 kpathsea version 3.5.4
148
149 The order is:
150 ['.eps', '.ps'] for latex
151 ['.png', '.pdf', '.jpg', '.tif'].
152
153 Another difference is that the search path is determined by the type
154 of the file being searched:
155 env['TEXINPUTS'] for "input" and "include" keywords
156 env['TEXINPUTS'] for "includegraphics" keyword
157 env['TEXINPUTS'] for "lstinputlisting" keyword
158 env['BIBINPUTS'] for "bibliography" keyword
159 env['BSTINPUTS'] for "bibliographystyle" keyword
160 env['INDEXSTYLE'] for "makeindex" keyword, no scanning support needed just allows user to set it if needed.
161
162 FIXME: also look for the class or style in document[class|style]{}
163 FIXME: also look for the argument of bibliographystyle{}
164 """
165 keyword_paths = {'include': 'TEXINPUTS',
166 'input': 'TEXINPUTS',
167 'includegraphics': 'TEXINPUTS',
168 'bibliography': 'BIBINPUTS',
169 'bibliographystyle': 'BSTINPUTS',
170 'addbibresource': 'BIBINPUTS',
171 'addglobalbib': 'BIBINPUTS',
172 'addsectionbib': 'BIBINPUTS',
173 'makeindex': 'INDEXSTYLE',
174 'usepackage': 'TEXINPUTS',
175 'lstinputlisting': 'TEXINPUTS'}
176 env_variables = SCons.Util.unique(list(keyword_paths.values()))
177 two_arg_commands = ['import', 'subimport',
178 'includefrom', 'subincludefrom',
179 'inputfrom', 'subinputfrom']
180
181 - def __init__(self, name, suffixes, graphics_extensions, *args, **kw):
182 regex = r'''
183 \\(
184 include
185 | includegraphics(?:\s*\[[^\]]+\])?
186 | lstinputlisting(?:\[[^\]]+\])?
187 | input
188 | import
189 | subimport
190 | includefrom
191 | subincludefrom
192 | inputfrom
193 | subinputfrom
194 | bibliography
195 | addbibresource
196 | addglobalbib
197 | addsectionbib
198 | usepackage
199 )
200 \s*{([^}]*)} # first arg
201 (?: \s*{([^}]*)} )? # maybe another arg
202 '''
203 self.cre = re.compile(regex, re.M | re.X)
204 self.comment_re = re.compile(r'^((?:(?:\\%)|[^%\n])*)(.*)$', re.M)
205
206 self.graphics_extensions = graphics_extensions
207
208 def _scan(node, env, path=(), self=self):
209 node = node.rfile()
210 if not node.exists():
211 return []
212 return self.scan_recurse(node, path)
213
214 class FindMultiPathDirs(object):
215 """The stock FindPathDirs function has the wrong granularity:
216 it is called once per target, while we need the path that depends
217 on what kind of included files is being searched. This wrapper
218 hides multiple instances of FindPathDirs, one per the LaTeX path
219 variable in the environment. When invoked, the function calculates
220 and returns all the required paths as a dictionary (converted into
221 a tuple to become hashable). Then the scan function converts it
222 back and uses a dictionary of tuples rather than a single tuple
223 of paths.
224 """
225 def __init__(self, dictionary):
226 self.dictionary = {}
227 for k,n in dictionary.items():
228 self.dictionary[k] = ( SCons.Scanner.FindPathDirs(n),
229 FindENVPathDirs(n) )
230
231 def __call__(self, env, dir=None, target=None, source=None,
232 argument=None):
233 di = {}
234 for k,(c,cENV) in self.dictionary.items():
235 di[k] = ( c(env, dir=None, target=None, source=None,
236 argument=None) ,
237 cENV(env, dir=None, target=None, source=None,
238 argument=None) )
239
240 return tuple(di.items())
241
242 class LaTeXScanCheck(object):
243 """Skip all but LaTeX source files, i.e., do not scan *.eps,
244 *.pdf, *.jpg, etc.
245 """
246 def __init__(self, suffixes):
247 self.suffixes = suffixes
248 def __call__(self, node, env):
249 current = not node.has_builder() or node.is_up_to_date()
250 scannable = node.get_suffix() in env.subst_list(self.suffixes)[0]
251
252 return scannable and current
253
254 kw['function'] = _scan
255 kw['path_function'] = FindMultiPathDirs(LaTeX.keyword_paths)
256 kw['recursive'] = 0
257 kw['skeys'] = suffixes
258 kw['scan_check'] = LaTeXScanCheck(suffixes)
259 kw['name'] = name
260
261 SCons.Scanner.Base.__init__(self, *args, **kw)
262
264 if include_type == 'input':
265 base, ext = os.path.splitext( filename )
266 if ext == "":
267 return [filename + '.tex']
268 if include_type in ('include', 'import', 'subimport',
269 'includefrom', 'subincludefrom',
270 'inputfrom', 'subinputfrom'):
271 base, ext = os.path.splitext( filename )
272 if ext == "":
273 return [filename + '.tex']
274 if include_type == 'bibliography':
275 base, ext = os.path.splitext( filename )
276 if ext == "":
277 return [filename + '.bib']
278 if include_type == 'usepackage':
279 base, ext = os.path.splitext( filename )
280 if ext == "":
281 return [filename + '.sty']
282 if include_type == 'includegraphics':
283 base, ext = os.path.splitext( filename )
284 if ext == "":
285
286
287
288
289
290 return [filename+e for e in self.graphics_extensions]
291 return [filename]
292
295
297 inc_type, inc_subdir, inc_filename = include
298 try:
299 sub_paths = path[inc_type]
300 except (IndexError, KeyError):
301 sub_paths = ((), ())
302 try_names = self._latex_names(inc_type, inc_filename)
303
304
305
306
307
308 search_paths = [(source_dir,)] + list(sub_paths)
309
310 for n in try_names:
311 for search_path in search_paths:
312 paths = tuple([d.Dir(inc_subdir) for d in search_path])
313 i = SCons.Node.FS.find_file(n, paths)
314 if i:
315 return i, include
316 return None, include
317
318 - def canonical_text(self, text):
319 """Standardize an input TeX-file contents.
320
321 Currently:
322 * removes comments, unwrapping comment-wrapped lines.
323 """
324 out = []
325 line_continues_a_comment = False
326 for line in text.splitlines():
327 line,comment = self.comment_re.findall(line)[0]
328 if line_continues_a_comment:
329 out[-1] = out[-1] + line.lstrip()
330 else:
331 out.append(line)
332 line_continues_a_comment = len(comment) > 0
333 return '\n'.join(out).rstrip()+'\n'
334
335 - def scan(self, node, subdir='.'):
336
337
338
339
340
341
342
343 noopt_cre = re.compile(r'\s*\[.*$')
344 if node.includes is not None:
345 includes = node.includes
346 else:
347 text = self.canonical_text(node.get_text_contents())
348 includes = self.cre.findall(text)
349
350
351
352
353
354
355
356
357
358 split_includes = []
359 for include in includes:
360 inc_type = noopt_cre.sub('', include[0])
361 inc_subdir = subdir
362 if inc_type in self.two_arg_commands:
363 inc_subdir = os.path.join(subdir, include[1])
364 inc_list = include[2].split(',')
365 else:
366 inc_list = include[1].split(',')
367 for inc in inc_list:
368 split_includes.append((inc_type, inc_subdir, inc))
369
370 includes = split_includes
371 node.includes = includes
372
373 return includes
374
376 """ do a recursive scan of the top level target file
377 This lets us search for included files based on the
378 directory of the main file just as latex does"""
379
380 path_dict = dict(list(path))
381
382 queue = []
383 queue.extend( self.scan(node) )
384 seen = {}
385
386
387
388
389
390
391
392
393 nodes = []
394 source_dir = node.get_dir()
395
396 while queue:
397
398 include = queue.pop()
399 inc_type, inc_subdir, inc_filename = include
400
401 try:
402 if seen[inc_filename] == 1:
403 continue
404 except KeyError:
405 seen[inc_filename] = 1
406
407
408
409
410 n, i = self.find_include(include, source_dir, path_dict)
411 if n is None:
412
413
414 if inc_type != 'usepackage':
415 SCons.Warnings.warn(SCons.Warnings.DependencyWarning,
416 "No dependency generated for file: %s (included from: %s) -- file not found" % (i, node))
417 else:
418 sortkey = self.sort_key(n)
419 nodes.append((sortkey, n))
420
421 queue.extend( self.scan(n, inc_subdir) )
422
423 return [pair[1] for pair in sorted(nodes)]
424
425
426
427
428
429
430