SCons.Scanner.LaTeX

1 """SCons.Scanner.LaTeX 2 3 This module implements the dependency scanner for LaTeX code. 4 5 """ 6 7 # 8 # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 The SCons Foundation 9 # 10 # Permission is hereby granted, free of charge, to any person obtaining 11 # a copy of this software and associated documentation files (the 12 # "Software"), to deal in the Software without restriction, including 13 # without limitation the rights to use, copy, modify, merge, publish, 14 # distribute, sublicense, and/or sell copies of the Software, and to 15 # permit persons to whom the Software is furnished to do so, subject to 16 # the following conditions: 17 # 18 # The above copyright notice and this permission notice shall be included 19 # in all copies or substantial portions of the Software. 20 # 21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 22 # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 23 # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 25 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 26 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 27 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 # 29 30 __revision__ = "src/engine/SCons/Scanner/LaTeX.py 5023 2010/06/14 22:05:46 scons" 31 32 import os.path 33 import re 34 35 import SCons.Scanner 36 import SCons.Util 37 38 # list of graphics file extensions for TeX and LaTeX 39 TexGraphics = ['.eps', '.ps'] 40 LatexGraphics = ['.pdf', '.png', '.jpg', '.gif', '.tif'] 41 42 # Used as a return value of modify_env_var if the variable is not set.

43 -class _Null(object):

44 pass

45 _null = _Null 46 47 # The user specifies the paths in env[variable], similar to other builders. 48 # They may be relative and must be converted to absolute, as expected 49 # by LaTeX and Co. The environment may already have some paths in 50 # env['ENV'][var]. These paths are honored, but the env[var] paths have 51 # higher precedence. All changes are un-done on exit.

52 -def modify_env_var(env, var, abspath):

53 try: 54 save = env['ENV'][var] 55 except KeyError: 56 save = _null 57 env.PrependENVPath(var, abspath) 58 try: 59 if SCons.Util.is_List(env[var]): 60 env.PrependENVPath(var, [os.path.abspath(str(p)) for p in env[var]]) 61 else: 62 # Split at os.pathsep to convert into absolute path 63 env.PrependENVPath(var, [os.path.abspath(p) for p in str(env[var]).split(os.pathsep)]) 64 except KeyError: 65 pass 66 67 # Convert into a string explicitly to append ":" (without which it won't search system 68 # paths as well). The problem is that env.AppendENVPath(var, ":") 69 # does not work, refuses to append ":" (os.pathsep). 70 71 if SCons.Util.is_List(env['ENV'][var]): 72 env['ENV'][var] = os.pathsep.join(env['ENV'][var]) 73 # Append the trailing os.pathsep character here to catch the case with no env[var] 74 env['ENV'][var] = env['ENV'][var] + os.pathsep 75 76 return save

77

78 -class FindENVPathDirs(object):

79 """A class to bind a specific *PATH variable name to a function that 80 will return all of the *path directories."""

81 - def __init__(self, variable):

82 self.variable = variable

83 - def __call__(self, env, dir=None, target=None, source=None, argument=None):

84 import SCons.PathList 85 try: 86 path = env['ENV'][self.variable] 87 except KeyError: 88 return () 89 90 dir = dir or env.fs._cwd 91 path = SCons.PathList.PathList(path).subst_path(env, target, source) 92 return tuple(dir.Rfindalldirs(path))

93 94 95

96 -def LaTeXScanner():

97 """Return a prototype Scanner instance for scanning LaTeX source files 98 when built with latex. 99 """ 100 ds = LaTeX(name = "LaTeXScanner", 101 suffixes = '$LATEXSUFFIXES', 102 # in the search order, see below in LaTeX class docstring 103 graphics_extensions = TexGraphics, 104 recursive = 0) 105 return ds

106

107 -def PDFLaTeXScanner():

108 """Return a prototype Scanner instance for scanning LaTeX source files 109 when built with pdflatex. 110 """ 111 ds = LaTeX(name = "PDFLaTeXScanner", 112 suffixes = '$LATEXSUFFIXES', 113 # in the search order, see below in LaTeX class docstring 114 graphics_extensions = LatexGraphics, 115 recursive = 0) 116 return ds

117

118 -class LaTeX(SCons.Scanner.Base):

119 """Class for scanning LaTeX files for included files. 120 121 Unlike most scanners, which use regular expressions that just 122 return the included file name, this returns a tuple consisting 123 of the keyword for the inclusion ("include", "includegraphics", 124 "input", or "bibliography"), and then the file name itself. 125 Based on a quick look at LaTeX documentation, it seems that we 126 should append .tex suffix for the "include" keywords, append .tex if 127 there is no extension for the "input" keyword, and need to add .bib 128 for the "bibliography" keyword that does not accept extensions by itself. 129 130 Finally, if there is no extension for an "includegraphics" keyword 131 latex will append .ps or .eps to find the file, while pdftex may use .pdf, 132 .jpg, .tif, .mps, or .png. 133 134 The actual subset and search order may be altered by 135 DeclareGraphicsExtensions command. This complication is ignored. 136 The default order corresponds to experimentation with teTeX 137 $ latex --version 138 pdfeTeX 3.141592-1.21a-2.2 (Web2C 7.5.4) 139 kpathsea version 3.5.4 140 The order is: 141 ['.eps', '.ps'] for latex 142 ['.png', '.pdf', '.jpg', '.tif']. 143 144 Another difference is that the search path is determined by the type 145 of the file being searched: 146 env['TEXINPUTS'] for "input" and "include" keywords 147 env['TEXINPUTS'] for "includegraphics" keyword 148 env['TEXINPUTS'] for "lstinputlisting" keyword 149 env['BIBINPUTS'] for "bibliography" keyword 150 env['BSTINPUTS'] for "bibliographystyle" keyword 151 152 FIXME: also look for the class or style in document[class|style]{} 153 FIXME: also look for the argument of bibliographystyle{} 154 """ 155 keyword_paths = {'include': 'TEXINPUTS', 156 'input': 'TEXINPUTS', 157 'includegraphics': 'TEXINPUTS', 158 'bibliography': 'BIBINPUTS', 159 'bibliographystyle': 'BSTINPUTS', 160 'usepackage': 'TEXINPUTS', 161 'lstinputlisting': 'TEXINPUTS'} 162 env_variables = SCons.Util.unique(list(keyword_paths.values())) 163

164 - def __init__(self, name, suffixes, graphics_extensions, *args, **kw):

165 166 # We have to include \n with the % we exclude from the first part 167 # part of the regex because the expression is compiled with re.M. 168 # Without the \n, the ^ could match the beginning of a *previous* 169 # line followed by one or more newline characters (i.e. blank 170 # lines), interfering with a match on the next line. 171 regex = r'^[^%\n]*\\(include|includegraphics(?:\[[^\]]+\])?|lstinputlisting(?:\[[^\]]+\])?|input|bibliography|usepackage){([^}]*)}' 172 self.cre = re.compile(regex, re.M) 173 self.graphics_extensions = graphics_extensions 174 175 def _scan(node, env, path=(), self=self): 176 node = node.rfile() 177 if not node.exists(): 178 return [] 179 return self.scan_recurse(node, path)

180 181 class FindMultiPathDirs(object): 182 """The stock FindPathDirs function has the wrong granularity: 183 it is called once per target, while we need the path that depends 184 on what kind of included files is being searched. This wrapper 185 hides multiple instances of FindPathDirs, one per the LaTeX path 186 variable in the environment. When invoked, the function calculates 187 and returns all the required paths as a dictionary (converted into 188 a tuple to become hashable). Then the scan function converts it 189 back and uses a dictionary of tuples rather than a single tuple 190 of paths. 191 """ 192 def __init__(self, dictionary): 193 self.dictionary = {} 194 for k,n in dictionary.items(): 195 self.dictionary[k] = ( SCons.Scanner.FindPathDirs(n), 196 FindENVPathDirs(n) )

197 198 def __call__(self, env, dir=None, target=None, source=None, 199 argument=None): 200 di = {} 201 for k,(c,cENV) in self.dictionary.items(): 202 di[k] = ( c(env, dir=None, target=None, source=None, 203 argument=None) , 204 cENV(env, dir=None, target=None, source=None, 205 argument=None) ) 206 # To prevent "dict is not hashable error" 207 return tuple(di.items()) 208 209 class LaTeXScanCheck(object): 210 """Skip all but LaTeX source files, i.e., do not scan *.eps, 211 *.pdf, *.jpg, etc. 212 """ 213 def __init__(self, suffixes): 214 self.suffixes = suffixes 215 def __call__(self, node, env): 216 current = not node.has_builder() or node.is_up_to_date() 217 scannable = node.get_suffix() in env.subst_list(self.suffixes)[0] 218 # Returning false means that the file is not scanned. 219 return scannable and current 220 221 kw['function'] = _scan 222 kw['path_function'] = FindMultiPathDirs(LaTeX.keyword_paths) 223 kw['recursive'] = 0 224 kw['skeys'] = suffixes 225 kw['scan_check'] = LaTeXScanCheck(suffixes) 226 kw['name'] = name 227 228 SCons.Scanner.Base.__init__(self, *args, **kw) 229

230 - def _latex_names(self, include):

231 filename = include[1] 232 if include[0] == 'input': 233 base, ext = os.path.splitext( filename ) 234 if ext == "": 235 return [filename + '.tex'] 236 if (include[0] == 'include'): 237 return [filename + '.tex'] 238 if include[0] == 'bibliography': 239 base, ext = os.path.splitext( filename ) 240 if ext == "": 241 return [filename + '.bib'] 242 if include[0] == 'usepackage': 243 base, ext = os.path.splitext( filename ) 244 if ext == "": 245 return [filename + '.sty'] 246 if include[0] == 'includegraphics': 247 base, ext = os.path.splitext( filename ) 248 if ext == "": 249 #return [filename+e for e in self.graphics_extensions + TexGraphics] 250 # use the line above to find dependencies for the PDF builder 251 # when only an .eps figure is present. Since it will be found 252 # if the user tells scons how to make the pdf figure, leave 253 # it out for now. 254 return [filename+e for e in self.graphics_extensions] 255 return [filename]

256

257 - def sort_key(self, include):

258 return SCons.Node.FS._my_normcase(str(include))

259

260 - def find_include(self, include, source_dir, path):

261 try: 262 sub_path = path[include[0]] 263 except (IndexError, KeyError): 264 sub_path = () 265 try_names = self._latex_names(include) 266 for n in try_names: 267 # see if we find it using the path in env[var] 268 i = SCons.Node.FS.find_file(n, (source_dir,) + sub_path[0]) 269 if i: 270 return i, include 271 # see if we find it using the path in env['ENV'][var] 272 i = SCons.Node.FS.find_file(n, (source_dir,) + sub_path[1]) 273 if i: 274 return i, include 275 return i, include

276

277 - def scan(self, node):

278 # Modify the default scan function to allow for the regular 279 # expression to return a comma separated list of file names 280 # as can be the case with the bibliography keyword. 281 282 # Cache the includes list in node so we only scan it once: 283 # path_dict = dict(list(path)) 284 noopt_cre = re.compile('\[.*$') 285 if node.includes != None: 286 includes = node.includes 287 else: 288 includes = self.cre.findall(node.get_text_contents()) 289 # 1. Split comma-separated lines, e.g. 290 # ('bibliography', 'phys,comp') 291 # should become two entries 292 # ('bibliography', 'phys') 293 # ('bibliography', 'comp') 294 # 2. Remove the options, e.g., such as 295 # ('includegraphics[clip,width=0.7\\linewidth]', 'picture.eps') 296 # should become 297 # ('includegraphics', 'picture.eps') 298 split_includes = [] 299 for include in includes: 300 inc_type = noopt_cre.sub('', include[0]) 301 inc_list = include[1].split(',') 302 for j in range(len(inc_list)): 303 split_includes.append( (inc_type, inc_list[j]) ) 304 # 305 includes = split_includes 306 node.includes = includes 307 308 return includes

309

310 - def scan_recurse(self, node, path=()):

311 """ do a recursive scan of the top level target file 312 This lets us search for included files based on the 313 directory of the main file just as latex does""" 314 315 path_dict = dict(list(path)) 316 317 queue = [] 318 queue.extend( self.scan(node) ) 319 seen = {} 320 321 # This is a hand-coded DSU (decorate-sort-undecorate, or 322 # Schwartzian transform) pattern. The sort key is the raw name 323 # of the file as specifed on the \include, \input, etc. line. 324 # TODO: what about the comment in the original Classic scanner: 325 # """which lets 326 # us keep the sort order constant regardless of whether the file 327 # is actually found in a Repository or locally.""" 328 nodes = [] 329 source_dir = node.get_dir() 330 #for include in includes: 331 while queue: 332 333 include = queue.pop() 334 try: 335 if seen[include[1]] == 1: 336 continue 337 except KeyError: 338 seen[include[1]] = 1 339 340 # 341 # Handle multiple filenames in include[1] 342 # 343 n, i = self.find_include(include, source_dir, path_dict) 344 if n is None: 345 # Do not bother with 'usepackage' warnings, as they most 346 # likely refer to system-level files 347 if include[0] != 'usepackage': 348 SCons.Warnings.warn(SCons.Warnings.DependencyWarning, 349 "No dependency generated for file: %s (included from: %s) -- file not found" % (i, node)) 350 else: 351 sortkey = self.sort_key(n) 352 nodes.append((sortkey, n)) 353 # recurse down 354 queue.extend( self.scan(n) ) 355 356 return [pair[1] for pair in sorted(nodes)]

357 358 # Local Variables: 359 # tab-width:4 360 # indent-tabs-mode:nil 361 # End: 362 # vim: set expandtab tabstop=4 shiftwidth=4: 363

Source Code for Module SCons.Scanner.LaTeX