SCons.Scanner.LaTeX

1 """SCons.Scanner.LaTeX 2 3 This module implements the dependency scanner for LaTeX code. 4 5 """ 6 7 # 8 # Copyright (c) 2001 - 2019 The SCons Foundation 9 # 10 # Permission is hereby granted, free of charge, to any person obtaining 11 # a copy of this software and associated documentation files (the 12 # "Software"), to deal in the Software without restriction, including 13 # without limitation the rights to use, copy, modify, merge, publish, 14 # distribute, sublicense, and/or sell copies of the Software, and to 15 # permit persons to whom the Software is furnished to do so, subject to 16 # the following conditions: 17 # 18 # The above copyright notice and this permission notice shall be included 19 # in all copies or substantial portions of the Software. 20 # 21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 22 # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 23 # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 25 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 26 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 27 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 # 29 30 __revision__ = "src/engine/SCons/Scanner/LaTeX.py 3a41ed6b288cee8d085373ad7fa02894e1903864 2019-01-23 17:30:35 bdeegan" 31 32 import os.path 33 import re 34 35 import SCons.Scanner 36 import SCons.Util 37 38 # list of graphics file extensions for TeX and LaTeX 39 TexGraphics = ['.eps', '.ps'] 40 #LatexGraphics = ['.pdf', '.png', '.jpg', '.gif', '.tif'] 41 LatexGraphics = [ '.png', '.jpg', '.gif', '.tif'] 42 43 44 # Used as a return value of modify_env_var if the variable is not set.

45 -class _Null(object):

46 pass

47 _null = _Null 48 49 # The user specifies the paths in env[variable], similar to other builders. 50 # They may be relative and must be converted to absolute, as expected 51 # by LaTeX and Co. The environment may already have some paths in 52 # env['ENV'][var]. These paths are honored, but the env[var] paths have 53 # higher precedence. All changes are un-done on exit.

54 -def modify_env_var(env, var, abspath):

55 try: 56 save = env['ENV'][var] 57 except KeyError: 58 save = _null 59 env.PrependENVPath(var, abspath) 60 try: 61 if SCons.Util.is_List(env[var]): 62 env.PrependENVPath(var, [os.path.abspath(str(p)) for p in env[var]]) 63 else: 64 # Split at os.pathsep to convert into absolute path 65 env.PrependENVPath(var, [os.path.abspath(p) for p in str(env[var]).split(os.pathsep)]) 66 except KeyError: 67 pass 68 69 # Convert into a string explicitly to append ":" (without which it won't search system 70 # paths as well). The problem is that env.AppendENVPath(var, ":") 71 # does not work, refuses to append ":" (os.pathsep). 72 73 if SCons.Util.is_List(env['ENV'][var]): 74 env['ENV'][var] = os.pathsep.join(env['ENV'][var]) 75 # Append the trailing os.pathsep character here to catch the case with no env[var] 76 env['ENV'][var] = env['ENV'][var] + os.pathsep 77 78 return save

79

80 -class FindENVPathDirs(object):

81 """ 82 A class to bind a specific E{*}PATH variable name to a function that 83 will return all of the E{*}path directories. 84 """

85 - def __init__(self, variable):

86 self.variable = variable

87 - def __call__(self, env, dir=None, target=None, source=None, argument=None):

88 import SCons.PathList 89 try: 90 path = env['ENV'][self.variable] 91 except KeyError: 92 return () 93 94 dir = dir or env.fs._cwd 95 path = SCons.PathList.PathList(path).subst_path(env, target, source) 96 return tuple(dir.Rfindalldirs(path))

97 98 99

100 -def LaTeXScanner():

101 """ 102 Return a prototype Scanner instance for scanning LaTeX source files 103 when built with latex. 104 """ 105 ds = LaTeX(name = "LaTeXScanner", 106 suffixes = '$LATEXSUFFIXES', 107 # in the search order, see below in LaTeX class docstring 108 graphics_extensions = TexGraphics, 109 recursive = 0) 110 return ds

111

112 -def PDFLaTeXScanner():

113 """ 114 Return a prototype Scanner instance for scanning LaTeX source files 115 when built with pdflatex. 116 """ 117 ds = LaTeX(name = "PDFLaTeXScanner", 118 suffixes = '$LATEXSUFFIXES', 119 # in the search order, see below in LaTeX class docstring 120 graphics_extensions = LatexGraphics, 121 recursive = 0) 122 return ds

123

124 -class LaTeX(SCons.Scanner.Base):

125 """ 126 Class for scanning LaTeX files for included files. 127 128 Unlike most scanners, which use regular expressions that just 129 return the included file name, this returns a tuple consisting 130 of the keyword for the inclusion ("include", "includegraphics", 131 "input", or "bibliography"), and then the file name itself. 132 Based on a quick look at LaTeX documentation, it seems that we 133 should append .tex suffix for the "include" keywords, append .tex if 134 there is no extension for the "input" keyword, and need to add .bib 135 for the "bibliography" keyword that does not accept extensions by itself. 136 137 Finally, if there is no extension for an "includegraphics" keyword 138 latex will append .ps or .eps to find the file, while pdftex may use .pdf, 139 .jpg, .tif, .mps, or .png. 140 141 The actual subset and search order may be altered by 142 DeclareGraphicsExtensions command. This complication is ignored. 143 The default order corresponds to experimentation with teTeX:: 144 145 $ latex --version 146 pdfeTeX 3.141592-1.21a-2.2 (Web2C 7.5.4) 147 kpathsea version 3.5.4 148 149 The order is: 150 ['.eps', '.ps'] for latex 151 ['.png', '.pdf', '.jpg', '.tif']. 152 153 Another difference is that the search path is determined by the type 154 of the file being searched: 155 env['TEXINPUTS'] for "input" and "include" keywords 156 env['TEXINPUTS'] for "includegraphics" keyword 157 env['TEXINPUTS'] for "lstinputlisting" keyword 158 env['BIBINPUTS'] for "bibliography" keyword 159 env['BSTINPUTS'] for "bibliographystyle" keyword 160 env['INDEXSTYLE'] for "makeindex" keyword, no scanning support needed just allows user to set it if needed. 161 162 FIXME: also look for the class or style in document[class|style]{} 163 FIXME: also look for the argument of bibliographystyle{} 164 """ 165 keyword_paths = {'include': 'TEXINPUTS', 166 'input': 'TEXINPUTS', 167 'includegraphics': 'TEXINPUTS', 168 'bibliography': 'BIBINPUTS', 169 'bibliographystyle': 'BSTINPUTS', 170 'addbibresource': 'BIBINPUTS', 171 'addglobalbib': 'BIBINPUTS', 172 'addsectionbib': 'BIBINPUTS', 173 'makeindex': 'INDEXSTYLE', 174 'usepackage': 'TEXINPUTS', 175 'lstinputlisting': 'TEXINPUTS'} 176 env_variables = SCons.Util.unique(list(keyword_paths.values())) 177 two_arg_commands = ['import', 'subimport', 178 'includefrom', 'subincludefrom', 179 'inputfrom', 'subinputfrom'] 180

181 - def __init__(self, name, suffixes, graphics_extensions, *args, **kw):

182 183 # We have to include \n with the % we exclude from the first part 184 # part of the regex because the expression is compiled with re.M. 185 # Without the \n, the ^ could match the beginning of a *previous* 186 # line followed by one or more newline characters (i.e. blank 187 # lines), interfering with a match on the next line. 188 # add option for whitespace before the '[options]' or the '{filename}' 189 regex = r''' 190 ^[^%\n]* 191 \\( 192 include 193 | includegraphics(?:\s*\[[^\]]+\])? 194 | lstinputlisting(?:\[[^\]]+\])? 195 | input 196 | import 197 | subimport 198 | includefrom 199 | subincludefrom 200 | inputfrom 201 | subinputfrom 202 | bibliography 203 | addbibresource 204 | addglobalbib 205 | addsectionbib 206 | usepackage 207 ) 208 \s*{([^}]*)} # first arg 209 (?: \s*{([^}]*)} )? # maybe another arg 210 ''' 211 self.cre = re.compile(regex, re.M | re.X) 212 self.comment_re = re.compile(r'^((?:(?:\\%)|[^%\n])*)(.*)$', re.M) 213 214 self.graphics_extensions = graphics_extensions 215 216 def _scan(node, env, path=(), self=self): 217 node = node.rfile() 218 if not node.exists(): 219 return [] 220 return self.scan_recurse(node, path)

221 222 class FindMultiPathDirs(object): 223 """The stock FindPathDirs function has the wrong granularity: 224 it is called once per target, while we need the path that depends 225 on what kind of included files is being searched. This wrapper 226 hides multiple instances of FindPathDirs, one per the LaTeX path 227 variable in the environment. When invoked, the function calculates 228 and returns all the required paths as a dictionary (converted into 229 a tuple to become hashable). Then the scan function converts it 230 back and uses a dictionary of tuples rather than a single tuple 231 of paths. 232 """ 233 def __init__(self, dictionary): 234 self.dictionary = {} 235 for k,n in dictionary.items(): 236 self.dictionary[k] = ( SCons.Scanner.FindPathDirs(n), 237 FindENVPathDirs(n) )

238 239 def __call__(self, env, dir=None, target=None, source=None, 240 argument=None): 241 di = {} 242 for k,(c,cENV) in self.dictionary.items(): 243 di[k] = ( c(env, dir=None, target=None, source=None, 244 argument=None) , 245 cENV(env, dir=None, target=None, source=None, 246 argument=None) ) 247 # To prevent "dict is not hashable error" 248 return tuple(di.items()) 249 250 class LaTeXScanCheck(object): 251 """Skip all but LaTeX source files, i.e., do not scan *.eps, 252 *.pdf, *.jpg, etc. 253 """ 254 def __init__(self, suffixes): 255 self.suffixes = suffixes 256 def __call__(self, node, env): 257 current = not node.has_builder() or node.is_up_to_date() 258 scannable = node.get_suffix() in env.subst_list(self.suffixes)[0] 259 # Returning false means that the file is not scanned. 260 return scannable and current 261 262 kw['function'] = _scan 263 kw['path_function'] = FindMultiPathDirs(LaTeX.keyword_paths) 264 kw['recursive'] = 0 265 kw['skeys'] = suffixes 266 kw['scan_check'] = LaTeXScanCheck(suffixes) 267 kw['name'] = name 268 269 SCons.Scanner.Base.__init__(self, *args, **kw) 270

271 - def _latex_names(self, include_type, filename):

272 if include_type == 'input': 273 base, ext = os.path.splitext( filename ) 274 if ext == "": 275 return [filename + '.tex'] 276 if include_type in ('include', 'import', 'subimport', 277 'includefrom', 'subincludefrom', 278 'inputfrom', 'subinputfrom'): 279 base, ext = os.path.splitext( filename ) 280 if ext == "": 281 return [filename + '.tex'] 282 if include_type == 'bibliography': 283 base, ext = os.path.splitext( filename ) 284 if ext == "": 285 return [filename + '.bib'] 286 if include_type == 'usepackage': 287 base, ext = os.path.splitext( filename ) 288 if ext == "": 289 return [filename + '.sty'] 290 if include_type == 'includegraphics': 291 base, ext = os.path.splitext( filename ) 292 if ext == "": 293 #return [filename+e for e in self.graphics_extensions + TexGraphics] 294 # use the line above to find dependencies for the PDF builder 295 # when only an .eps figure is present. Since it will be found 296 # if the user tells scons how to make the pdf figure, leave 297 # it out for now. 298 return [filename+e for e in self.graphics_extensions] 299 return [filename]

300

301 - def sort_key(self, include):

302 return SCons.Node.FS._my_normcase(str(include))

303

304 - def find_include(self, include, source_dir, path):

305 inc_type, inc_subdir, inc_filename = include 306 try: 307 sub_paths = path[inc_type] 308 except (IndexError, KeyError): 309 sub_paths = ((), ()) 310 try_names = self._latex_names(inc_type, inc_filename) 311 312 # There are three search paths to try: 313 # 1. current directory "source_dir" 314 # 2. env[var] 315 # 3. env['ENV'][var] 316 search_paths = [(source_dir,)] + list(sub_paths) 317 318 for n in try_names: 319 for search_path in search_paths: 320 paths = tuple([d.Dir(inc_subdir) for d in search_path]) 321 i = SCons.Node.FS.find_file(n, paths) 322 if i: 323 return i, include 324 return None, include

325

326 - def canonical_text(self, text):

327 """Standardize an input TeX-file contents. 328 329 Currently: 330 * removes comments, unwrapping comment-wrapped lines. 331 """ 332 out = [] 333 line_continues_a_comment = False 334 for line in text.splitlines(): 335 line,comment = self.comment_re.findall(line)[0] 336 if line_continues_a_comment: 337 out[-1] = out[-1] + line.lstrip() 338 else: 339 out.append(line) 340 line_continues_a_comment = len(comment) > 0 341 return '\n'.join(out).rstrip()+'\n'

342

343 - def scan(self, node, subdir='.'):

344 # Modify the default scan function to allow for the regular 345 # expression to return a comma separated list of file names 346 # as can be the case with the bibliography keyword. 347 348 # Cache the includes list in node so we only scan it once: 349 # path_dict = dict(list(path)) 350 # add option for whitespace (\s) before the '[' 351 noopt_cre = re.compile('\s*\[.*$') 352 if node.includes is not None: 353 includes = node.includes 354 else: 355 text = self.canonical_text(node.get_text_contents()) 356 includes = self.cre.findall(text) 357 # 1. Split comma-separated lines, e.g. 358 # ('bibliography', 'phys,comp') 359 # should become two entries 360 # ('bibliography', 'phys') 361 # ('bibliography', 'comp') 362 # 2. Remove the options, e.g., such as 363 # ('includegraphics[clip,width=0.7\\linewidth]', 'picture.eps') 364 # should become 365 # ('includegraphics', 'picture.eps') 366 split_includes = [] 367 for include in includes: 368 inc_type = noopt_cre.sub('', include[0]) 369 inc_subdir = subdir 370 if inc_type in self.two_arg_commands: 371 inc_subdir = os.path.join(subdir, include[1]) 372 inc_list = include[2].split(',') 373 else: 374 inc_list = include[1].split(',') 375 for j in range(len(inc_list)): 376 split_includes.append( (inc_type, inc_subdir, inc_list[j]) ) 377 # 378 includes = split_includes 379 node.includes = includes 380 381 return includes

382

383 - def scan_recurse(self, node, path=()):

384 """ do a recursive scan of the top level target file 385 This lets us search for included files based on the 386 directory of the main file just as latex does""" 387 388 path_dict = dict(list(path)) 389 390 queue = [] 391 queue.extend( self.scan(node) ) 392 seen = {} 393 394 # This is a hand-coded DSU (decorate-sort-undecorate, or 395 # Schwartzian transform) pattern. The sort key is the raw name 396 # of the file as specifed on the \include, \input, etc. line. 397 # TODO: what about the comment in the original Classic scanner: 398 # """which lets 399 # us keep the sort order constant regardless of whether the file 400 # is actually found in a Repository or locally.""" 401 nodes = [] 402 source_dir = node.get_dir() 403 #for include in includes: 404 while queue: 405 406 include = queue.pop() 407 inc_type, inc_subdir, inc_filename = include 408 409 try: 410 if seen[inc_filename] == 1: 411 continue 412 except KeyError: 413 seen[inc_filename] = 1 414 415 # 416 # Handle multiple filenames in include[1] 417 # 418 n, i = self.find_include(include, source_dir, path_dict) 419 if n is None: 420 # Do not bother with 'usepackage' warnings, as they most 421 # likely refer to system-level files 422 if inc_type != 'usepackage': 423 SCons.Warnings.warn(SCons.Warnings.DependencyWarning, 424 "No dependency generated for file: %s (included from: %s) -- file not found" % (i, node)) 425 else: 426 sortkey = self.sort_key(n) 427 nodes.append((sortkey, n)) 428 # recurse down 429 queue.extend( self.scan(n, inc_subdir) ) 430 431 return [pair[1] for pair in sorted(nodes)]

432 433 # Local Variables: 434 # tab-width:4 435 # indent-tabs-mode:nil 436 # End: 437 # vim: set expandtab tabstop=4 shiftwidth=4: 438

Source Code for Module SCons.Scanner.LaTeX