SCons.Scanner.LaTeX

1 """SCons.Scanner.LaTeX 2 3 This module implements the dependency scanner for LaTeX code. 4 5 """ 6 7 # 8 # Copyright (c) 2001 - 2019 The SCons Foundation 9 # 10 # Permission is hereby granted, free of charge, to any person obtaining 11 # a copy of this software and associated documentation files (the 12 # "Software"), to deal in the Software without restriction, including 13 # without limitation the rights to use, copy, modify, merge, publish, 14 # distribute, sublicense, and/or sell copies of the Software, and to 15 # permit persons to whom the Software is furnished to do so, subject to 16 # the following conditions: 17 # 18 # The above copyright notice and this permission notice shall be included 19 # in all copies or substantial portions of the Software. 20 # 21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 22 # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 23 # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 25 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 26 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 27 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 # 29 30 __revision__ = "src/engine/SCons/Scanner/LaTeX.py e724ae812eb96f4858a132f5b8c769724744faf6 2019-07-21 00:04:47 bdeegan" 31 32 import os.path 33 import re 34 35 import SCons.Scanner 36 import SCons.Util 37 38 # list of graphics file extensions for TeX and LaTeX 39 TexGraphics = ['.eps', '.ps'] 40 #LatexGraphics = ['.pdf', '.png', '.jpg', '.gif', '.tif'] 41 LatexGraphics = [ '.png', '.jpg', '.gif', '.tif'] 42 43 44 # Used as a return value of modify_env_var if the variable is not set.

45 -class _Null(object):

46 pass

47 _null = _Null 48 49 # The user specifies the paths in env[variable], similar to other builders. 50 # They may be relative and must be converted to absolute, as expected 51 # by LaTeX and Co. The environment may already have some paths in 52 # env['ENV'][var]. These paths are honored, but the env[var] paths have 53 # higher precedence. All changes are un-done on exit.

54 -def modify_env_var(env, var, abspath):

55 try: 56 save = env['ENV'][var] 57 except KeyError: 58 save = _null 59 env.PrependENVPath(var, abspath) 60 try: 61 if SCons.Util.is_List(env[var]): 62 env.PrependENVPath(var, [os.path.abspath(str(p)) for p in env[var]]) 63 else: 64 # Split at os.pathsep to convert into absolute path 65 env.PrependENVPath(var, [os.path.abspath(p) for p in str(env[var]).split(os.pathsep)]) 66 except KeyError: 67 pass 68 69 # Convert into a string explicitly to append ":" (without which it won't search system 70 # paths as well). The problem is that env.AppendENVPath(var, ":") 71 # does not work, refuses to append ":" (os.pathsep). 72 73 if SCons.Util.is_List(env['ENV'][var]): 74 env['ENV'][var] = os.pathsep.join(env['ENV'][var]) 75 # Append the trailing os.pathsep character here to catch the case with no env[var] 76 env['ENV'][var] = env['ENV'][var] + os.pathsep 77 78 return save

79

80 -class FindENVPathDirs(object):

81 """ 82 A class to bind a specific E{*}PATH variable name to a function that 83 will return all of the E{*}path directories. 84 """

85 - def __init__(self, variable):

86 self.variable = variable

87 - def __call__(self, env, dir=None, target=None, source=None, argument=None):

88 import SCons.PathList 89 try: 90 path = env['ENV'][self.variable] 91 except KeyError: 92 return () 93 94 dir = dir or env.fs._cwd 95 path = SCons.PathList.PathList(path).subst_path(env, target, source) 96 return tuple(dir.Rfindalldirs(path))

97 98 99

100 -def LaTeXScanner():

101 """ 102 Return a prototype Scanner instance for scanning LaTeX source files 103 when built with latex. 104 """ 105 ds = LaTeX(name = "LaTeXScanner", 106 suffixes = '$LATEXSUFFIXES', 107 # in the search order, see below in LaTeX class docstring 108 graphics_extensions = TexGraphics, 109 recursive = 0) 110 return ds

111

112 -def PDFLaTeXScanner():

113 """ 114 Return a prototype Scanner instance for scanning LaTeX source files 115 when built with pdflatex. 116 """ 117 ds = LaTeX(name = "PDFLaTeXScanner", 118 suffixes = '$LATEXSUFFIXES', 119 # in the search order, see below in LaTeX class docstring 120 graphics_extensions = LatexGraphics, 121 recursive = 0) 122 return ds

123

124 -class LaTeX(SCons.Scanner.Base):

125 """ 126 Class for scanning LaTeX files for included files. 127 128 Unlike most scanners, which use regular expressions that just 129 return the included file name, this returns a tuple consisting 130 of the keyword for the inclusion ("include", "includegraphics", 131 "input", or "bibliography"), and then the file name itself. 132 Based on a quick look at LaTeX documentation, it seems that we 133 should append .tex suffix for the "include" keywords, append .tex if 134 there is no extension for the "input" keyword, and need to add .bib 135 for the "bibliography" keyword that does not accept extensions by itself. 136 137 Finally, if there is no extension for an "includegraphics" keyword 138 latex will append .ps or .eps to find the file, while pdftex may use .pdf, 139 .jpg, .tif, .mps, or .png. 140 141 The actual subset and search order may be altered by 142 DeclareGraphicsExtensions command. This complication is ignored. 143 The default order corresponds to experimentation with teTeX:: 144 145 $ latex --version 146 pdfeTeX 3.141592-1.21a-2.2 (Web2C 7.5.4) 147 kpathsea version 3.5.4 148 149 The order is: 150 ['.eps', '.ps'] for latex 151 ['.png', '.pdf', '.jpg', '.tif']. 152 153 Another difference is that the search path is determined by the type 154 of the file being searched: 155 env['TEXINPUTS'] for "input" and "include" keywords 156 env['TEXINPUTS'] for "includegraphics" keyword 157 env['TEXINPUTS'] for "lstinputlisting" keyword 158 env['BIBINPUTS'] for "bibliography" keyword 159 env['BSTINPUTS'] for "bibliographystyle" keyword 160 env['INDEXSTYLE'] for "makeindex" keyword, no scanning support needed just allows user to set it if needed. 161 162 FIXME: also look for the class or style in document[class|style]{} 163 FIXME: also look for the argument of bibliographystyle{} 164 """ 165 keyword_paths = {'include': 'TEXINPUTS', 166 'input': 'TEXINPUTS', 167 'includegraphics': 'TEXINPUTS', 168 'bibliography': 'BIBINPUTS', 169 'bibliographystyle': 'BSTINPUTS', 170 'addbibresource': 'BIBINPUTS', 171 'addglobalbib': 'BIBINPUTS', 172 'addsectionbib': 'BIBINPUTS', 173 'makeindex': 'INDEXSTYLE', 174 'usepackage': 'TEXINPUTS', 175 'lstinputlisting': 'TEXINPUTS'} 176 env_variables = SCons.Util.unique(list(keyword_paths.values())) 177 two_arg_commands = ['import', 'subimport', 178 'includefrom', 'subincludefrom', 179 'inputfrom', 'subinputfrom'] 180

181 - def __init__(self, name, suffixes, graphics_extensions, *args, **kw):

182 regex = r''' 183 \\( 184 include 185 | includegraphics(?:\s*\[[^\]]+\])? 186 | lstinputlisting(?:\[[^\]]+\])? 187 | input 188 | import 189 | subimport 190 | includefrom 191 | subincludefrom 192 | inputfrom 193 | subinputfrom 194 | bibliography 195 | addbibresource 196 | addglobalbib 197 | addsectionbib 198 | usepackage 199 ) 200 \s*{([^}]*)} # first arg 201 (?: \s*{([^}]*)} )? # maybe another arg 202 ''' 203 self.cre = re.compile(regex, re.M | re.X) 204 self.comment_re = re.compile(r'^((?:(?:\\%)|[^%\n])*)(.*)$', re.M) 205 206 self.graphics_extensions = graphics_extensions 207 208 def _scan(node, env, path=(), self=self): 209 node = node.rfile() 210 if not node.exists(): 211 return [] 212 return self.scan_recurse(node, path)

213 214 class FindMultiPathDirs(object): 215 """The stock FindPathDirs function has the wrong granularity: 216 it is called once per target, while we need the path that depends 217 on what kind of included files is being searched. This wrapper 218 hides multiple instances of FindPathDirs, one per the LaTeX path 219 variable in the environment. When invoked, the function calculates 220 and returns all the required paths as a dictionary (converted into 221 a tuple to become hashable). Then the scan function converts it 222 back and uses a dictionary of tuples rather than a single tuple 223 of paths. 224 """ 225 def __init__(self, dictionary): 226 self.dictionary = {} 227 for k,n in dictionary.items(): 228 self.dictionary[k] = ( SCons.Scanner.FindPathDirs(n), 229 FindENVPathDirs(n) )

230 231 def __call__(self, env, dir=None, target=None, source=None, 232 argument=None): 233 di = {} 234 for k,(c,cENV) in self.dictionary.items(): 235 di[k] = ( c(env, dir=None, target=None, source=None, 236 argument=None) , 237 cENV(env, dir=None, target=None, source=None, 238 argument=None) ) 239 # To prevent "dict is not hashable error" 240 return tuple(di.items()) 241 242 class LaTeXScanCheck(object): 243 """Skip all but LaTeX source files, i.e., do not scan *.eps, 244 *.pdf, *.jpg, etc. 245 """ 246 def __init__(self, suffixes): 247 self.suffixes = suffixes 248 def __call__(self, node, env): 249 current = not node.has_builder() or node.is_up_to_date() 250 scannable = node.get_suffix() in env.subst_list(self.suffixes)[0] 251 # Returning false means that the file is not scanned. 252 return scannable and current 253 254 kw['function'] = _scan 255 kw['path_function'] = FindMultiPathDirs(LaTeX.keyword_paths) 256 kw['recursive'] = 0 257 kw['skeys'] = suffixes 258 kw['scan_check'] = LaTeXScanCheck(suffixes) 259 kw['name'] = name 260 261 SCons.Scanner.Base.__init__(self, *args, **kw) 262

263 - def _latex_names(self, include_type, filename):

264 if include_type == 'input': 265 base, ext = os.path.splitext( filename ) 266 if ext == "": 267 return [filename + '.tex'] 268 if include_type in ('include', 'import', 'subimport', 269 'includefrom', 'subincludefrom', 270 'inputfrom', 'subinputfrom'): 271 base, ext = os.path.splitext( filename ) 272 if ext == "": 273 return [filename + '.tex'] 274 if include_type == 'bibliography': 275 base, ext = os.path.splitext( filename ) 276 if ext == "": 277 return [filename + '.bib'] 278 if include_type == 'usepackage': 279 base, ext = os.path.splitext( filename ) 280 if ext == "": 281 return [filename + '.sty'] 282 if include_type == 'includegraphics': 283 base, ext = os.path.splitext( filename ) 284 if ext == "": 285 #return [filename+e for e in self.graphics_extensions + TexGraphics] 286 # use the line above to find dependencies for the PDF builder 287 # when only an .eps figure is present. Since it will be found 288 # if the user tells scons how to make the pdf figure, leave 289 # it out for now. 290 return [filename+e for e in self.graphics_extensions] 291 return [filename]

292

293 - def sort_key(self, include):

294 return SCons.Node.FS._my_normcase(str(include))

295

296 - def find_include(self, include, source_dir, path):

297 inc_type, inc_subdir, inc_filename = include 298 try: 299 sub_paths = path[inc_type] 300 except (IndexError, KeyError): 301 sub_paths = ((), ()) 302 try_names = self._latex_names(inc_type, inc_filename) 303 304 # There are three search paths to try: 305 # 1. current directory "source_dir" 306 # 2. env[var] 307 # 3. env['ENV'][var] 308 search_paths = [(source_dir,)] + list(sub_paths) 309 310 for n in try_names: 311 for search_path in search_paths: 312 paths = tuple([d.Dir(inc_subdir) for d in search_path]) 313 i = SCons.Node.FS.find_file(n, paths) 314 if i: 315 return i, include 316 return None, include

317

318 - def canonical_text(self, text):

319 """Standardize an input TeX-file contents. 320 321 Currently: 322 * removes comments, unwrapping comment-wrapped lines. 323 """ 324 out = [] 325 line_continues_a_comment = False 326 for line in text.splitlines(): 327 line,comment = self.comment_re.findall(line)[0] 328 if line_continues_a_comment: 329 out[-1] = out[-1] + line.lstrip() 330 else: 331 out.append(line) 332 line_continues_a_comment = len(comment) > 0 333 return '\n'.join(out).rstrip()+'\n'

334

335 - def scan(self, node, subdir='.'):

336 # Modify the default scan function to allow for the regular 337 # expression to return a comma separated list of file names 338 # as can be the case with the bibliography keyword. 339 340 # Cache the includes list in node so we only scan it once: 341 # path_dict = dict(list(path)) 342 # add option for whitespace (\s) before the '[' 343 noopt_cre = re.compile(r'\s*\[.*$') 344 if node.includes is not None: 345 includes = node.includes 346 else: 347 text = self.canonical_text(node.get_text_contents()) 348 includes = self.cre.findall(text) 349 # 1. Split comma-separated lines, e.g. 350 # ('bibliography', 'phys,comp') 351 # should become two entries 352 # ('bibliography', 'phys') 353 # ('bibliography', 'comp') 354 # 2. Remove the options, e.g., such as 355 # ('includegraphics[clip,width=0.7\\linewidth]', 'picture.eps') 356 # should become 357 # ('includegraphics', 'picture.eps') 358 split_includes = [] 359 for include in includes: 360 inc_type = noopt_cre.sub('', include[0]) 361 inc_subdir = subdir 362 if inc_type in self.two_arg_commands: 363 inc_subdir = os.path.join(subdir, include[1]) 364 inc_list = include[2].split(',') 365 else: 366 inc_list = include[1].split(',') 367 for inc in inc_list: 368 split_includes.append((inc_type, inc_subdir, inc)) 369 370 includes = split_includes 371 node.includes = includes 372 373 return includes

374

375 - def scan_recurse(self, node, path=()):

376 """ do a recursive scan of the top level target file 377 This lets us search for included files based on the 378 directory of the main file just as latex does""" 379 380 path_dict = dict(list(path)) 381 382 queue = [] 383 queue.extend( self.scan(node) ) 384 seen = {} 385 386 # This is a hand-coded DSU (decorate-sort-undecorate, or 387 # Schwartzian transform) pattern. The sort key is the raw name 388 # of the file as specifed on the \include, \input, etc. line. 389 # TODO: what about the comment in the original Classic scanner: 390 # """which lets 391 # us keep the sort order constant regardless of whether the file 392 # is actually found in a Repository or locally.""" 393 nodes = [] 394 source_dir = node.get_dir() 395 #for include in includes: 396 while queue: 397 398 include = queue.pop() 399 inc_type, inc_subdir, inc_filename = include 400 401 try: 402 if seen[inc_filename] == 1: 403 continue 404 except KeyError: 405 seen[inc_filename] = 1 406 407 # 408 # Handle multiple filenames in include[1] 409 # 410 n, i = self.find_include(include, source_dir, path_dict) 411 if n is None: 412 # Do not bother with 'usepackage' warnings, as they most 413 # likely refer to system-level files 414 if inc_type != 'usepackage': 415 SCons.Warnings.warn(SCons.Warnings.DependencyWarning, 416 "No dependency generated for file: %s (included from: %s) -- file not found" % (i, node)) 417 else: 418 sortkey = self.sort_key(n) 419 nodes.append((sortkey, n)) 420 # recurse down 421 queue.extend( self.scan(n, inc_subdir) ) 422 423 return [pair[1] for pair in sorted(nodes)]

424 425 # Local Variables: 426 # tab-width:4 427 # indent-tabs-mode:nil 428 # End: 429 # vim: set expandtab tabstop=4 shiftwidth=4: 430

Source Code for Module SCons.Scanner.LaTeX