SCons.Scanner.LaTeX

1 """SCons.Scanner.LaTeX 2 3 This module implements the dependency scanner for LaTeX code. 4 5 """ 6 7 # 8 # Copyright (c) 2001 - 2014 The SCons Foundation 9 # 10 # Permission is hereby granted, free of charge, to any person obtaining 11 # a copy of this software and associated documentation files (the 12 # "Software"), to deal in the Software without restriction, including 13 # without limitation the rights to use, copy, modify, merge, publish, 14 # distribute, sublicense, and/or sell copies of the Software, and to 15 # permit persons to whom the Software is furnished to do so, subject to 16 # the following conditions: 17 # 18 # The above copyright notice and this permission notice shall be included 19 # in all copies or substantial portions of the Software. 20 # 21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 22 # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 23 # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 25 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 26 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 27 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 # 29 30 __revision__ = "src/engine/SCons/Scanner/LaTeX.py 2014/09/27 12:51:43 garyo" 31 32 import os.path 33 import re 34 35 import SCons.Scanner 36 import SCons.Util 37 38 # list of graphics file extensions for TeX and LaTeX 39 TexGraphics = ['.eps', '.ps'] 40 LatexGraphics = ['.pdf', '.png', '.jpg', '.gif', '.tif'] 41 42 # Used as a return value of modify_env_var if the variable is not set.

43 -class _Null(object):

44 pass

45 _null = _Null 46 47 # The user specifies the paths in env[variable], similar to other builders. 48 # They may be relative and must be converted to absolute, as expected 49 # by LaTeX and Co. The environment may already have some paths in 50 # env['ENV'][var]. These paths are honored, but the env[var] paths have 51 # higher precedence. All changes are un-done on exit.

52 -def modify_env_var(env, var, abspath):

53 try: 54 save = env['ENV'][var] 55 except KeyError: 56 save = _null 57 env.PrependENVPath(var, abspath) 58 try: 59 if SCons.Util.is_List(env[var]): 60 env.PrependENVPath(var, [os.path.abspath(str(p)) for p in env[var]]) 61 else: 62 # Split at os.pathsep to convert into absolute path 63 env.PrependENVPath(var, [os.path.abspath(p) for p in str(env[var]).split(os.pathsep)]) 64 except KeyError: 65 pass 66 67 # Convert into a string explicitly to append ":" (without which it won't search system 68 # paths as well). The problem is that env.AppendENVPath(var, ":") 69 # does not work, refuses to append ":" (os.pathsep). 70 71 if SCons.Util.is_List(env['ENV'][var]): 72 env['ENV'][var] = os.pathsep.join(env['ENV'][var]) 73 # Append the trailing os.pathsep character here to catch the case with no env[var] 74 env['ENV'][var] = env['ENV'][var] + os.pathsep 75 76 return save

77

78 -class FindENVPathDirs(object):

79 """A class to bind a specific *PATH variable name to a function that 80 will return all of the *path directories."""

81 - def __init__(self, variable):

82 self.variable = variable

83 - def __call__(self, env, dir=None, target=None, source=None, argument=None):

84 import SCons.PathList 85 try: 86 path = env['ENV'][self.variable] 87 except KeyError: 88 return () 89 90 dir = dir or env.fs._cwd 91 path = SCons.PathList.PathList(path).subst_path(env, target, source) 92 return tuple(dir.Rfindalldirs(path))

93 94 95

96 -def LaTeXScanner():

97 """Return a prototype Scanner instance for scanning LaTeX source files 98 when built with latex. 99 """ 100 ds = LaTeX(name = "LaTeXScanner", 101 suffixes = '$LATEXSUFFIXES', 102 # in the search order, see below in LaTeX class docstring 103 graphics_extensions = TexGraphics, 104 recursive = 0) 105 return ds

106

107 -def PDFLaTeXScanner():

108 """Return a prototype Scanner instance for scanning LaTeX source files 109 when built with pdflatex. 110 """ 111 ds = LaTeX(name = "PDFLaTeXScanner", 112 suffixes = '$LATEXSUFFIXES', 113 # in the search order, see below in LaTeX class docstring 114 graphics_extensions = LatexGraphics, 115 recursive = 0) 116 return ds

117

118 -class LaTeX(SCons.Scanner.Base):

119 """Class for scanning LaTeX files for included files. 120 121 Unlike most scanners, which use regular expressions that just 122 return the included file name, this returns a tuple consisting 123 of the keyword for the inclusion ("include", "includegraphics", 124 "input", or "bibliography"), and then the file name itself. 125 Based on a quick look at LaTeX documentation, it seems that we 126 should append .tex suffix for the "include" keywords, append .tex if 127 there is no extension for the "input" keyword, and need to add .bib 128 for the "bibliography" keyword that does not accept extensions by itself. 129 130 Finally, if there is no extension for an "includegraphics" keyword 131 latex will append .ps or .eps to find the file, while pdftex may use .pdf, 132 .jpg, .tif, .mps, or .png. 133 134 The actual subset and search order may be altered by 135 DeclareGraphicsExtensions command. This complication is ignored. 136 The default order corresponds to experimentation with teTeX 137 $ latex --version 138 pdfeTeX 3.141592-1.21a-2.2 (Web2C 7.5.4) 139 kpathsea version 3.5.4 140 The order is: 141 ['.eps', '.ps'] for latex 142 ['.png', '.pdf', '.jpg', '.tif']. 143 144 Another difference is that the search path is determined by the type 145 of the file being searched: 146 env['TEXINPUTS'] for "input" and "include" keywords 147 env['TEXINPUTS'] for "includegraphics" keyword 148 env['TEXINPUTS'] for "lstinputlisting" keyword 149 env['BIBINPUTS'] for "bibliography" keyword 150 env['BSTINPUTS'] for "bibliographystyle" keyword 151 env['INDEXSTYLE'] for "makeindex" keyword, no scanning support needed 152 just allows user to set it if needed. 153 154 FIXME: also look for the class or style in document[class|style]{} 155 FIXME: also look for the argument of bibliographystyle{} 156 """ 157 keyword_paths = {'include': 'TEXINPUTS', 158 'input': 'TEXINPUTS', 159 'includegraphics': 'TEXINPUTS', 160 'bibliography': 'BIBINPUTS', 161 'bibliographystyle': 'BSTINPUTS', 162 'addbibresource': 'BIBINPUTS', 163 'addglobalbib': 'BIBINPUTS', 164 'addsectionbib': 'BIBINPUTS', 165 'makeindex': 'INDEXSTYLE', 166 'usepackage': 'TEXINPUTS', 167 'lstinputlisting': 'TEXINPUTS'} 168 env_variables = SCons.Util.unique(list(keyword_paths.values())) 169

170 - def __init__(self, name, suffixes, graphics_extensions, *args, **kw):

171 172 # We have to include \n with the % we exclude from the first part 173 # part of the regex because the expression is compiled with re.M. 174 # Without the \n, the ^ could match the beginning of a *previous* 175 # line followed by one or more newline characters (i.e. blank 176 # lines), interfering with a match on the next line. 177 # add option for whitespace before the '[options]' or the '{filename}' 178 regex = r'^[^%\n]*\\(include|includegraphics(?:\s*\[[^\]]+\])?|lstinputlisting(?:\[[^\]]+\])?|input|bibliography|addbibresource|addglobalbib|addsectionbib|usepackage)\s*{([^}]*)}' 179 self.cre = re.compile(regex, re.M) 180 self.comment_re = re.compile(r'^((?:(?:\\%)|[^%\n])*)(.*)$', re.M) 181 182 self.graphics_extensions = graphics_extensions 183 184 def _scan(node, env, path=(), self=self): 185 node = node.rfile() 186 if not node.exists(): 187 return [] 188 return self.scan_recurse(node, path)

189 190 class FindMultiPathDirs(object): 191 """The stock FindPathDirs function has the wrong granularity: 192 it is called once per target, while we need the path that depends 193 on what kind of included files is being searched. This wrapper 194 hides multiple instances of FindPathDirs, one per the LaTeX path 195 variable in the environment. When invoked, the function calculates 196 and returns all the required paths as a dictionary (converted into 197 a tuple to become hashable). Then the scan function converts it 198 back and uses a dictionary of tuples rather than a single tuple 199 of paths. 200 """ 201 def __init__(self, dictionary): 202 self.dictionary = {} 203 for k,n in dictionary.items(): 204 self.dictionary[k] = ( SCons.Scanner.FindPathDirs(n), 205 FindENVPathDirs(n) )

206 207 def __call__(self, env, dir=None, target=None, source=None, 208 argument=None): 209 di = {} 210 for k,(c,cENV) in self.dictionary.items(): 211 di[k] = ( c(env, dir=None, target=None, source=None, 212 argument=None) , 213 cENV(env, dir=None, target=None, source=None, 214 argument=None) ) 215 # To prevent "dict is not hashable error" 216 return tuple(di.items()) 217 218 class LaTeXScanCheck(object): 219 """Skip all but LaTeX source files, i.e., do not scan *.eps, 220 *.pdf, *.jpg, etc. 221 """ 222 def __init__(self, suffixes): 223 self.suffixes = suffixes 224 def __call__(self, node, env): 225 current = not node.has_builder() or node.is_up_to_date() 226 scannable = node.get_suffix() in env.subst_list(self.suffixes)[0] 227 # Returning false means that the file is not scanned. 228 return scannable and current 229 230 kw['function'] = _scan 231 kw['path_function'] = FindMultiPathDirs(LaTeX.keyword_paths) 232 kw['recursive'] = 0 233 kw['skeys'] = suffixes 234 kw['scan_check'] = LaTeXScanCheck(suffixes) 235 kw['name'] = name 236 237 SCons.Scanner.Base.__init__(self, *args, **kw) 238

239 - def _latex_names(self, include):

240 filename = include[1] 241 if include[0] == 'input': 242 base, ext = os.path.splitext( filename ) 243 if ext == "": 244 return [filename + '.tex'] 245 if (include[0] == 'include'): 246 return [filename + '.tex'] 247 if include[0] == 'bibliography': 248 base, ext = os.path.splitext( filename ) 249 if ext == "": 250 return [filename + '.bib'] 251 if include[0] == 'usepackage': 252 base, ext = os.path.splitext( filename ) 253 if ext == "": 254 return [filename + '.sty'] 255 if include[0] == 'includegraphics': 256 base, ext = os.path.splitext( filename ) 257 if ext == "": 258 #return [filename+e for e in self.graphics_extensions + TexGraphics] 259 # use the line above to find dependencies for the PDF builder 260 # when only an .eps figure is present. Since it will be found 261 # if the user tells scons how to make the pdf figure, leave 262 # it out for now. 263 return [filename+e for e in self.graphics_extensions] 264 return [filename]

265

266 - def sort_key(self, include):

267 return SCons.Node.FS._my_normcase(str(include))

268

269 - def find_include(self, include, source_dir, path):

270 try: 271 sub_path = path[include[0]] 272 except (IndexError, KeyError): 273 sub_path = () 274 try_names = self._latex_names(include) 275 for n in try_names: 276 # see if we find it using the path in env[var] 277 i = SCons.Node.FS.find_file(n, (source_dir,) + sub_path[0]) 278 if i: 279 return i, include 280 # see if we find it using the path in env['ENV'][var] 281 i = SCons.Node.FS.find_file(n, (source_dir,) + sub_path[1]) 282 if i: 283 return i, include 284 return i, include

285

286 - def canonical_text(self, text):

287 """Standardize an input TeX-file contents. 288 289 Currently: 290 * removes comments, unwrapping comment-wrapped lines. 291 """ 292 out = [] 293 line_continues_a_comment = False 294 for line in text.splitlines(): 295 line,comment = self.comment_re.findall(line)[0] 296 if line_continues_a_comment == True: 297 out[-1] = out[-1] + line.lstrip() 298 else: 299 out.append(line) 300 line_continues_a_comment = len(comment) > 0 301 return '\n'.join(out).rstrip()+'\n'

302

303 - def scan(self, node):

304 # Modify the default scan function to allow for the regular 305 # expression to return a comma separated list of file names 306 # as can be the case with the bibliography keyword. 307 308 # Cache the includes list in node so we only scan it once: 309 # path_dict = dict(list(path)) 310 # add option for whitespace (\s) before the '[' 311 noopt_cre = re.compile('\s*\[.*$') 312 if node.includes != None: 313 includes = node.includes 314 else: 315 text = self.canonical_text(node.get_text_contents()) 316 includes = self.cre.findall(text) 317 # 1. Split comma-separated lines, e.g. 318 # ('bibliography', 'phys,comp') 319 # should become two entries 320 # ('bibliography', 'phys') 321 # ('bibliography', 'comp') 322 # 2. Remove the options, e.g., such as 323 # ('includegraphics[clip,width=0.7\\linewidth]', 'picture.eps') 324 # should become 325 # ('includegraphics', 'picture.eps') 326 split_includes = [] 327 for include in includes: 328 inc_type = noopt_cre.sub('', include[0]) 329 inc_list = include[1].split(',') 330 for j in range(len(inc_list)): 331 split_includes.append( (inc_type, inc_list[j]) ) 332 # 333 includes = split_includes 334 node.includes = includes 335 336 return includes

337

338 - def scan_recurse(self, node, path=()):

339 """ do a recursive scan of the top level target file 340 This lets us search for included files based on the 341 directory of the main file just as latex does""" 342 343 path_dict = dict(list(path)) 344 345 queue = [] 346 queue.extend( self.scan(node) ) 347 seen = {} 348 349 # This is a hand-coded DSU (decorate-sort-undecorate, or 350 # Schwartzian transform) pattern. The sort key is the raw name 351 # of the file as specifed on the \include, \input, etc. line. 352 # TODO: what about the comment in the original Classic scanner: 353 # """which lets 354 # us keep the sort order constant regardless of whether the file 355 # is actually found in a Repository or locally.""" 356 nodes = [] 357 source_dir = node.get_dir() 358 #for include in includes: 359 while queue: 360 361 include = queue.pop() 362 try: 363 if seen[include[1]] == 1: 364 continue 365 except KeyError: 366 seen[include[1]] = 1 367 368 # 369 # Handle multiple filenames in include[1] 370 # 371 n, i = self.find_include(include, source_dir, path_dict) 372 if n is None: 373 # Do not bother with 'usepackage' warnings, as they most 374 # likely refer to system-level files 375 if include[0] != 'usepackage': 376 SCons.Warnings.warn(SCons.Warnings.DependencyWarning, 377 "No dependency generated for file: %s (included from: %s) -- file not found" % (i, node)) 378 else: 379 sortkey = self.sort_key(n) 380 nodes.append((sortkey, n)) 381 # recurse down 382 queue.extend( self.scan(n) ) 383 384 return [pair[1] for pair in sorted(nodes)]

385 386 # Local Variables: 387 # tab-width:4 388 # indent-tabs-mode:nil 389 # End: 390 # vim: set expandtab tabstop=4 shiftwidth=4: 391

Source Code for Module SCons.Scanner.LaTeX