antkeeper
/
superbuild

#!/usr/bin/env python# coding=utf-8
# amalgamate.py - Amalgamate C source and header files.# Copyright (c) 2012, Erik Edlund <erik.edlund@32767.se># # Redistribution and use in source and binary forms, with or without modification,# are permitted provided that the following conditions are met:# #  * Redistributions of source code must retain the above copyright notice,#  this list of conditions and the following disclaimer.# #  * Redistributions in binary form must reproduce the above copyright notice,#  this list of conditions and the following disclaimer in the documentation#  and/or other materials provided with the distribution.# #  * Neither the name of Erik Edlund, nor the names of its contributors may#  be used to endorse or promote products derived from this software without#  specific prior written permission.# # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from __future__ import divisionfrom __future__ import print_functionfrom __future__ import unicode_literals
import argparseimport datetimeimport jsonimport osimport re

class Amalgamation(object):
    # Prepends self.source_path to file_path if needed.    def actual_path(self, file_path):        if not os.path.isabs(file_path):            file_path = os.path.join(self.source_path, file_path)        return file_path
    # Search included file_path in self.include_paths and    # in source_dir if specified.    def find_included_file(self, file_path, source_dir):        search_dirs = self.include_paths[:]        if source_dir:            search_dirs.insert(0, source_dir)
        for search_dir in search_dirs:            search_path = os.path.join(search_dir, file_path)            if os.path.isfile(self.actual_path(search_path)):                return search_path        return None
    def __init__(self, args):        with open(args.config, 'r') as f:            config = json.loads(f.read())            for key in config:                setattr(self, key, config[key])
            self.verbose = args.verbose == "yes"            self.prologue = args.prologue            self.source_path = args.source_path            self.included_files = []
    # Generate the amalgamation and write it to the target file.    def generate(self):        amalgamation = ""
        if self.prologue:            with open(self.prologue, 'r') as f:                amalgamation += datetime.datetime.now().strftime(f.read())
        if self.verbose:            print("Config:")            print(" target        = {0}".format(self.target))            print(" working_dir   = {0}".format(os.getcwd()))            print(" include_paths = {0}".format(self.include_paths))        print("Creating amalgamation:")        for file_path in self.sources:            # Do not check the include paths while processing the source            # list, all given source paths must be correct.            # actual_path = self.actual_path(file_path)            print(" - processing \"{0}\"".format(file_path))            t = TranslationUnit(file_path, self, True)            amalgamation += t.content
        with open(self.target, 'w') as f:            f.write(amalgamation)
        print("...done!\n")        if self.verbose:            print("Files processed: {0}".format(self.sources))            print("Files included: {0}".format(self.included_files))        print("")

def _is_within(match, matches):    for m in matches:        if match.start() > m.start() and \                match.end() < m.end():            return True    return False

class TranslationUnit(object):    # // C++ comment.    cpp_comment_pattern = re.compile(r"//.*?\n")
    # /* C comment. */    c_comment_pattern = re.compile(r"/\*.*?\*/", re.S)
    # "complex \"stri\\\ng\" value".    string_pattern = re.compile("[^']" r'".*?(?<=[^\\])"', re.S)
    # Handle simple include directives. Support for advanced    # directives where macros and defines needs to expanded is    # not a concern right now.    include_pattern = re.compile(        r'#\s*include\s+(<|")(?P<path>.*?)("|>)', re.S)
    # #pragma once    pragma_once_pattern = re.compile(r'#\s*pragma\s+once', re.S)
    # Search for pattern in self.content, add the match to    # contexts if found and update the index accordingly.    def _search_content(self, index, pattern, contexts):        match = pattern.search(self.content, index)        if match:            contexts.append(match)            return match.end()        return index + 2
    # Return all the skippable contexts, i.e., comments and strings    def _find_skippable_contexts(self):        # Find contexts in the content in which a found include        # directive should not be processed.        skippable_contexts = []
        # Walk through the content char by char, and try to grab        # skippable contexts using regular expressions when found.        i = 1        content_len = len(self.content)        while i < content_len:            j = i - 1            current = self.content[i]            previous = self.content[j]
            if current == '"':                # String value.                i = self._search_content(j, self.string_pattern,                                         skippable_contexts)            elif current == '*' and previous == '/':                # C style comment.                i = self._search_content(j, self.c_comment_pattern,                                         skippable_contexts)            elif current == '/' and previous == '/':                # C++ style comment.                i = self._search_content(j, self.cpp_comment_pattern,                                         skippable_contexts)            else:                # Skip to the next char.                i += 1
        return skippable_contexts
    # Returns True if the match is within list of other matches
    # Removes pragma once from content    def _process_pragma_once(self):        content_len = len(self.content)        if content_len < len("#include <x>"):            return 0
        # Find contexts in the content in which a found include        # directive should not be processed.        skippable_contexts = self._find_skippable_contexts()
        pragmas = []        pragma_once_match = self.pragma_once_pattern.search(self.content)        while pragma_once_match:            if not _is_within(pragma_once_match, skippable_contexts):                pragmas.append(pragma_once_match)
            pragma_once_match = self.pragma_once_pattern.search(self.content,                                                                pragma_once_match.end())
        # Handle all collected pragma once directives.        prev_end = 0        tmp_content = ''        for pragma_match in pragmas:            tmp_content += self.content[prev_end:pragma_match.start()]            prev_end = pragma_match.end()        tmp_content += self.content[prev_end:]        self.content = tmp_content
    # Include all trivial #include directives into self.content.    def _process_includes(self):        content_len = len(self.content)        if content_len < len("#include <x>"):            return 0
        # Find contexts in the content in which a found include        # directive should not be processed.        skippable_contexts = self._find_skippable_contexts()
        # Search for include directives in the content, collect those        # which should be included into the content.        includes = []        include_match = self.include_pattern.search(self.content)        while include_match:            if not _is_within(include_match, skippable_contexts):                include_path = include_match.group("path")                search_same_dir = include_match.group(1) == '"'                found_included_path = self.amalgamation.find_included_file(                    include_path, self.file_dir if search_same_dir else None)                if found_included_path:                    includes.append((include_match, found_included_path))
            include_match = self.include_pattern.search(self.content,                                                        include_match.end())
        # Handle all collected include directives.        prev_end = 0        tmp_content = ''        for include in includes:            include_match, found_included_path = include            tmp_content += self.content[prev_end:include_match.start()]            tmp_content += "// {0}\n".format(include_match.group(0))            if found_included_path not in self.amalgamation.included_files:                t = TranslationUnit(found_included_path, self.amalgamation, False)                tmp_content += t.content            prev_end = include_match.end()        tmp_content += self.content[prev_end:]        self.content = tmp_content
        return len(includes)
    # Make all content processing    def _process(self):        if not self.is_root:            self._process_pragma_once()        self._process_includes()
    def __init__(self, file_path, amalgamation, is_root):        self.file_path = file_path        self.file_dir = os.path.dirname(file_path)        self.amalgamation = amalgamation        self.is_root = is_root
        self.amalgamation.included_files.append(self.file_path)
        actual_path = self.amalgamation.actual_path(file_path)        if not os.path.isfile(actual_path):            raise IOError("File not found: \"{0}\"".format(file_path))        with open(actual_path, 'r') as f:            self.content = f.read()            self._process()

def main():    description = "Amalgamate C source and header files."    usage = " ".join([        "amalgamate.py",        "[-v]",        "-c path/to/config.json",        "-s path/to/source/dir",        "[-p path/to/prologue.(c|h)]"    ])    argsparser = argparse.ArgumentParser(        description=description, usage=usage)
    argsparser.add_argument("-v", "--verbose", dest="verbose",                            choices=["yes", "no"], metavar="", help="be verbose")
    argsparser.add_argument("-c", "--config", dest="config",                            required=True, metavar="", help="path to a JSON config file")
    argsparser.add_argument("-s", "--source", dest="source_path",                            required=True, metavar="", help="source code path")
    argsparser.add_argument("-p", "--prologue", dest="prologue",                            required=False, metavar="", help="path to a C prologue file")
    amalgamation = Amalgamation(argsparser.parse_args())    amalgamation.generate()

if __name__ == "__main__":    main()