Source code for SCons.cpp

# MIT License
#
# Copyright The SCons Foundation
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

"""SCons C Pre-Processor module"""

import os
import re

# First "subsystem" of regular expressions that we set up:
#
# Stuff to turn the C preprocessor directives in a file's contents into
# a list of tuples that we can process easily.
#
# A table of regular expressions that fetch the arguments from the rest of
# a C preprocessor line.  Different directives have different arguments
# that we want to fetch, using the regular expressions to which the lists
# of preprocessor directives map.
cpp_lines_dict = {
    # Fetch the rest of a #if/#elif as one argument,
    # with white space optional.
    ('if', 'elif')      : r'\s*(.+)',

    # Fetch the rest of a #ifdef/#ifndef as one argument,
    # separated from the keyword by white space.
    ('ifdef', 'ifndef',): r'\s+(.+)',

    # Fetch the rest of a #import/#include/#include_next line as one
    # argument, with white space optional.
    ('import', 'include', 'include_next',)
                        : r'\s*(.+)',

    # We don't care what comes after a #else or #endif line.
    ('else', 'endif',)  : '',

    # Fetch three arguments from a #define line:
    #   1) The #defined keyword.
    #   2) The optional parentheses and arguments (if it's a function-like
    #      macro, '' if it's not).
    #   3) The expansion value.
    ('define',)         : r'\s+([_A-Za-z][_A-Za-z0-9_]*)(\([^)]*\))?\s*(.*)',

    # Fetch the #undefed keyword from a #undef line.
    ('undef',)          : r'\s+([_A-Za-z][A-Za-z0-9_]*)',
}

# Create a table that maps each individual C preprocessor directive to
# the corresponding compiled regular expression that fetches the arguments
# we care about.
Table = {}
for op_list, expr in cpp_lines_dict.items():
    e = re.compile(expr)
    for op in op_list:
        Table[op] = e
del e
del op
del op_list

# Create a list of the expressions we'll use to match all of the
# preprocessor directives.  These are the same as the directives
# themselves *except* that we must use a negative lookahead assertion
# when matching "if" so it doesn't match the "if" in "ifdef" or "ifndef".
override = {
    'if'                        : 'if(?!n?def)',
}
l = [override.get(x, x) for x in Table.keys()]


# Turn the list of expressions into one big honkin' regular expression
# that will match all the preprocessor lines at once.  This will return
# a list of tuples, one for each preprocessor line.  The preprocessor
# directive will be the first element in each tuple, and the rest of
# the line will be the second element.
e = r'^\s*#\s*(' + '|'.join(l) + ')(.*)$'

# And last but not least, compile the expression.
CPP_Expression = re.compile(e, re.M)

# A list with RE to cleanup CPP Expressions (tuples)
# We should remove all comments and carriage returns (\r) before evaluating
CPP_Expression_Cleaner_List = [
    r"/\*.*\*/",
    r"/\*.*",
    r"//.*",
    r"\r"
]
CPP_Expression_Cleaner_RE = re.compile(
    r"\s*(" + "|".join(CPP_Expression_Cleaner_List) + ")")

[docs]def Cleanup_CPP_Expressions(ts):
    return [(t[0], CPP_Expression_Cleaner_RE.sub("", t[1])) for t in ts]

#
# Second "subsystem" of regular expressions that we set up:
#
# Stuff to translate a C preprocessor expression (as found on a #if or
# #elif line) into an equivalent Python expression that we can eval().
#

# A dictionary that maps the C representation of Boolean operators
# to their Python equivalents.
CPP_to_Python_Ops_Dict = {
    '!'         : ' not ',
    '!='        : ' != ',
    '&&'        : ' and ',
    '||'        : ' or ',
    '?'         : ' and ',
    ':'         : ' or ',
}

CPP_to_Python_Ops_Sub = lambda m: CPP_to_Python_Ops_Dict[m.group(0)]

# We have to sort the keys by length so that longer expressions
# come *before* shorter expressions--in particular, "!=" must
# come before "!" in the alternation.  Without this, the Python
# re module, as late as version 2.2.2, empirically matches the
# "!" in "!=" first, instead of finding the longest match.
# What's up with that?
l = sorted(list(CPP_to_Python_Ops_Dict.keys()), key=lambda a: len(a), reverse=True)

# Turn the list of keys into one regular expression that will allow us
# to substitute all of the operators at once.
expr = '|'.join(map(re.escape, l))

# ...and compile the expression.
CPP_to_Python_Ops_Expression = re.compile(expr)

# A separate list of expressions to be evaluated and substituted
# sequentially, not all at once.
CPP_to_Python_Eval_List = [
    [r'defined\s+(\w+)',                 '"\\1" in __dict__'],
    [r'defined\s*\((\w+)\)',             '"\\1" in __dict__'],
    [r'(0x[0-9A-Fa-f]+)(?:L|UL)?',  '\\1'],
    [r'(\d+)(?:L|UL)?',  '\\1'],
]

# Replace the string representations of the regular expressions in the
# list with compiled versions.
for l in CPP_to_Python_Eval_List:
    l[0] = re.compile(l[0])

# Wrap up all of the above into a handy function.
[docs]def CPP_to_Python(s):
    """
    Converts a C pre-processor expression into an equivalent
    Python expression that can be evaluated.
    """
    s = CPP_to_Python_Ops_Expression.sub(CPP_to_Python_Ops_Sub, s)
    for expr, repl in CPP_to_Python_Eval_List:
        s = re.sub(expr, repl, s)
    return s



del expr
del l
del override



[docs]class FunctionEvaluator:
    """Handles delayed evaluation of a #define function call."""

    def __init__(self, name, args, expansion) -> None:
        """
        Squirrels away the arguments and expansion value of a #define
        macro function for later evaluation when we must actually expand
        a value that uses it.
        """
        self.name = name
        self.args = function_arg_separator.split(args)
        try:
            expansion = expansion.split('##')
        except AttributeError:
            pass
        self.expansion = expansion

[docs]    def __call__(self, *values):
        """
        Evaluates the expansion of a #define macro function called
        with the specified values.
        """
        if len(self.args) != len(values):
            raise ValueError("Incorrect number of arguments to `%s'" % self.name)
        # Create a dictionary that maps the macro arguments to the
        # corresponding values in this "call."  We'll use this when we
        # eval() the expansion so that arguments will get expanded to
        # the right values.
        args = self.args
        localvars = {k: v for k, v in zip(args, values)}
        parts = [s if s in args else repr(s) for s in self.expansion]
        statement = ' + '.join(parts)

        return eval(statement, globals(), localvars)


# Find line continuations.
line_continuations = re.compile('\\\\\r?\n')

# Search for a "function call" macro on an expansion.  Returns the
# two-tuple of the "function" name itself, and a string containing the
# arguments within the call parentheses.
function_name = re.compile(r'(\S+)\(([^)]*)\)')

# Split a string containing comma-separated function call arguments into
# the separate arguments.
function_arg_separator = re.compile(r',\s*')



[docs]class PreProcessor:
    """The main workhorse class for handling C pre-processing."""

    def __init__(self, current=os.curdir, cpppath=(), dict={}, all: int=0, depth=-1) -> None:
        global Table

        cpppath = tuple(cpppath)

        self.searchpath = {
            '"': (current,) + cpppath,
            '<': cpppath + (current,),
        }

        # Initialize our C preprocessor namespace for tracking the
        # values of #defined keywords.  We use this namespace to look
        # for keywords on #ifdef/#ifndef lines, and to eval() the
        # expressions on #if/#elif lines (after massaging them from C to
        # Python).
        self.cpp_namespace = dict.copy()
        self.cpp_namespace['__dict__'] = self.cpp_namespace

        # Return all includes without resolving
        if all:
            self.do_include = self.all_include

        # Max depth of nested includes:
        # -1 = unlimited
        # 0 - disabled nesting
        # >0 - number of allowed nested includes
        self.depth = depth

        # For efficiency, a dispatch table maps each C preprocessor
        # directive (#if, #define, etc.) to the method that should be
        # called when we see it.  We accomodate state changes (#if,
        # #ifdef, #ifndef) by pushing the current dispatch table on a
        # stack and changing what method gets called for each relevant
        # directive we might see next at this level (#else, #elif).
        # #endif will simply pop the stack.
        d = {'scons_current_file': self.scons_current_file}
        for op in Table.keys():
            d[op] = getattr(self, 'do_' + op)
        self.default_table = d

[docs]    def __call__(self, file):
        """
        Pre-processes a file.

        This is the main public entry point.
        """
        self.current_file = file
        return self.process_file(file)

[docs]    def process_file(self, file):
        """
        Pre-processes a file.

        This is the main internal entry point.
        """
        return self._process_tuples(self.tupleize(self.read_file(file)), file)

[docs]    def process_contents(self, contents):
        """
        Pre-processes a file contents.

        Is used by tests
        """
        return self._process_tuples(self.tupleize(contents))

[docs]    def _process_tuples(self, tuples, file=None):
        self.stack = []
        self.dispatch_table = self.default_table.copy()
        self.current_file = file
        self.tuples = tuples

        self.initialize_result(file)
        while self.tuples:
            t = self.tuples.pop(0)
            # Uncomment to see the list of tuples being processed (e.g.,
            # to validate the CPP lines are being translated correctly).
            # print(t)
            self.dispatch_table[t[0]](t)
        return self.finalize_result(file)

[docs]    def tupleize(self, contents):
        """
        Turns the contents of a file into a list of easily-processed
        tuples describing the CPP lines in the file.

        The first element of each tuple is the line's preprocessor
        directive (#if, #include, #define, etc., minus the initial '#').
        The remaining elements are specific to the type of directive, as
        pulled apart by the regular expression.
        """
        return self._match_tuples(self._parse_tuples(contents))

[docs]    def _parse_tuples(self, contents):
        global CPP_Expression
        contents = line_continuations.sub('', contents)
        tuples = CPP_Expression.findall(contents)
        return Cleanup_CPP_Expressions(tuples)

[docs]    def _match_tuples(self, tuples):
        global Table
        result = []
        for t in tuples:
            m = Table[t[0]].match(t[1])
            if m:
                result.append((t[0],) + m.groups())
        return result

    # Dispatch table stack manipulation methods.

[docs]    def save(self) -> None:
        """
        Pushes the current dispatch table on the stack and re-initializes
        the current dispatch table to the default.
        """
        self.stack.append(self.dispatch_table)
        self.dispatch_table = self.default_table.copy()

[docs]    def restore(self) -> None:
        """
        Pops the previous dispatch table off the stack and makes it the
        current one.
        """
        try: self.dispatch_table = self.stack.pop()
        except IndexError: pass

    # Utility methods.

[docs]    def do_nothing(self, t) -> None:
        """
        Null method for when we explicitly want the action for a
        specific preprocessor directive to do nothing.
        """
        pass

[docs]    def scons_current_file(self, t) -> None:
        self.current_file = t[1]

[docs]    def eval_expression(self, t):
        """
        Evaluates a C preprocessor expression.

        This is done by converting it to a Python equivalent and
        eval()ing it in the C preprocessor namespace we use to
        track #define values.
        """
        t = CPP_to_Python(' '.join(t[1:]))
        try:
            return eval(t, self.cpp_namespace)
        except (NameError, TypeError, SyntaxError):
            return 0

[docs]    def initialize_result(self, fname) -> None:
        self.result = [fname]

[docs]    def finalize_result(self, fname):
        return self.result[1:]

[docs]    def find_include_file(self, t):
        """
        Finds the #include file for a given preprocessor tuple.
        """
        fname = t[2]
        for d in self.searchpath[t[1]]:
            if d == os.curdir:
                f = fname
            else:
                f = os.path.join(d, fname)
            if os.path.isfile(f):
                return f
        return None

[docs]    def read_file(self, file):
        with open(file) as f:
            return f.read()

    # Start and stop processing include lines.

[docs]    def start_handling_includes(self, t=None) -> None:
        """
        Causes the PreProcessor object to start processing #import,
        #include and #include_next lines.

        This method will be called when a #if, #ifdef, #ifndef or #elif
        evaluates True, or when we reach the #else in a #if, #ifdef,
        #ifndef or #elif block where a condition already evaluated
        False.

        """
        d = self.dispatch_table
        p = self.stack[-1] if self.stack else self.default_table

        for k in ('import', 'include', 'include_next', 'define', 'undef'):
            d[k] = p[k]

[docs]    def stop_handling_includes(self, t=None) -> None:
        """
        Causes the PreProcessor object to stop processing #import,
        #include and #include_next lines.

        This method will be called when a #if, #ifdef, #ifndef or #elif
        evaluates False, or when we reach the #else in a #if, #ifdef,
        #ifndef or #elif block where a condition already evaluated True.
        """
        d = self.dispatch_table
        d['import'] = self.do_nothing
        d['include'] =  self.do_nothing
        d['include_next'] =  self.do_nothing
        d['define'] =  self.do_nothing
        d['undef'] =  self.do_nothing

    # Default methods for handling all of the preprocessor directives.
    # (Note that what actually gets called for a given directive at any
    # point in time is really controlled by the dispatch_table.)

[docs]    def _do_if_else_condition(self, condition) -> None:
        """
        Common logic for evaluating the conditions on #if, #ifdef and
        #ifndef lines.
        """
        self.save()
        d = self.dispatch_table
        if condition:
            self.start_handling_includes()
            d['elif'] = self.stop_handling_includes
            d['else'] = self.stop_handling_includes
        else:
            self.stop_handling_includes()
            d['elif'] = self.do_elif
            d['else'] = self.start_handling_includes

[docs]    def do_ifdef(self, t) -> None:
        """
        Default handling of a #ifdef line.
        """
        self._do_if_else_condition(t[1] in self.cpp_namespace)

[docs]    def do_ifndef(self, t) -> None:
        """
        Default handling of a #ifndef line.
        """
        self._do_if_else_condition(t[1] not in self.cpp_namespace)

[docs]    def do_if(self, t) -> None:
        """
        Default handling of a #if line.
        """
        self._do_if_else_condition(self.eval_expression(t))

[docs]    def do_elif(self, t) -> None:
        """
        Default handling of a #elif line.
        """
        d = self.dispatch_table
        if self.eval_expression(t):
            self.start_handling_includes()
            d['elif'] = self.stop_handling_includes
            d['else'] = self.stop_handling_includes

[docs]    def do_else(self, t) -> None:
        """
        Default handling of a #else line.
        """
        pass

[docs]    def do_endif(self, t) -> None:
        """
        Default handling of a #endif line.
        """
        self.restore()

[docs]    def do_define(self, t) -> None:
        """
        Default handling of a #define line.
        """
        _, name, args, expansion = t
        try:
            expansion = int(expansion)
        except (TypeError, ValueError):
            # handle "defined" chain "! (defined (A) || defined (B)" ...
            if "defined " in expansion:
                self.cpp_namespace[name] = self.eval_expression(t[2:])
                return

        if args:
            evaluator = FunctionEvaluator(name, args[1:-1], expansion)
            self.cpp_namespace[name] = evaluator
        else:
            self.cpp_namespace[name] = expansion

[docs]    def do_undef(self, t) -> None:
        """
        Default handling of a #undef line.
        """
        try: del self.cpp_namespace[t[1]]
        except KeyError: pass

[docs]    def do_import(self, t) -> None:
        """
        Default handling of a #import line.
        """
        # XXX finish this -- maybe borrow/share logic from do_include()...?
        pass

[docs]    def do_include(self, t) -> None:
        """
        Default handling of a #include line.
        """
        t = self.resolve_include(t)
        if not t:
            return
        include_file = self.find_include_file(t)
        # avoid infinite recursion
        if not include_file or include_file in self.result:
            return
        self.result.append(include_file)
        # print include_file, len(self.tuples)

        # Handle maximum depth of nested includes
        if self.depth != -1:
            current_depth = 0
            for t in self.tuples:
                if t[0] == "scons_current_file":
                    current_depth += 1
            if current_depth >= self.depth:
                return

        new_tuples = [('scons_current_file', include_file)] + \
                      self.tupleize(self.read_file(include_file)) + \
                     [('scons_current_file', self.current_file)]
        self.tuples[:] = new_tuples + self.tuples

    # From: Stefan Seefeld <seefeld@sympatico.ca> (22 Nov 2005)
    #
    # By the way, #include_next is not the same as #include. The difference
    # being that #include_next starts its search in the path following the
    # path that let to the including file. In other words, if your system
    # include paths are ['/foo', '/bar'], and you are looking at a header
    # '/foo/baz.h', it might issue an '#include_next <baz.h>' which would
    # correctly resolve to '/bar/baz.h' (if that exists), but *not* see
    # '/foo/baz.h' again. See
    # https://gcc.gnu.org/onlinedocs/cpp/Wrapper-Headers.html for more notes.
    #
    # I have no idea in what context #import might be used.
    # Update: possibly these notes?
    # https://github.com/MicrosoftDocs/cpp-docs/blob/main/docs/preprocessor/hash-import-directive-cpp.md

    # XXX is #include_next really the same as #include ?
    do_include_next = do_include

    # Utility methods for handling resolution of include files.

[docs]    def resolve_include(self, t):
        """Resolve a tuple-ized #include line.

        This handles recursive expansion of values without "" or <>
        surrounding the name until an initial " or < is found, to handle
        #include FILE where FILE is a #define somewhere else.
        """
        s = t[1].strip()
        while not s[0] in '<"':
            try:
                s = self.cpp_namespace[s]
                # strip backslashes from the computed include (-DFOO_H=\"foo.h\")
                for c in '<">':
                    s = s.replace(f"\\{c}", c)
            except KeyError:
                m = function_name.search(s)

                # Date: Mon, 28 Nov 2016 17:47:13 UTC
                # From: Ivan Kravets <ikravets@platformio.org>
                #
                # Ignore `#include` directive that depends on dynamic macro
                # which is not located in state TABLE
                # For example, `#include MYCONFIG_FILE`
                if not m:
                    return None

                s = self.cpp_namespace[m.group(1)]
                if callable(s):
                    args = function_arg_separator.split(m.group(2))
                    s = s(*args)
            if not s:
                return None
        return (t[0], s[0], s[1:-1])

[docs]    def all_include(self, t) -> None:
        """
        """
        self.result.append(self.resolve_include(t))


[docs]class DumbPreProcessor(PreProcessor):
    """A preprocessor that ignores all #if/#elif/#else/#endif directives
    and just reports back *all* of the #include files (like the classic
    SCons scanner did).

    This is functionally equivalent to using a regular expression to
    find all of the #include lines, only slower.  It exists mainly as
    an example of how the main PreProcessor class can be sub-classed
    to tailor its behavior.
    """
    def __init__(self, *args, **kw) -> None:
        PreProcessor.__init__(self, *args, **kw)
        d = self.default_table
        for func in ['if', 'elif', 'else', 'endif', 'ifdef', 'ifndef']:
            d[func] = d[func] = self.do_nothing

# Local Variables:
# tab-width:4
# indent-tabs-mode:nil
# End:
# vim: set expandtab tabstop=4 shiftwidth=4: