|
|
- #!/usr/bin/env python
- # coding=utf-8
-
- # amalgamate.py - Amalgamate C source and header files.
- # Copyright (c) 2012, Erik Edlund <erik.edlund@32767.se>
- #
- # Redistribution and use in source and binary forms, with or without modification,
- # are permitted provided that the following conditions are met:
- #
- # * Redistributions of source code must retain the above copyright notice,
- # this list of conditions and the following disclaimer.
- #
- # * Redistributions in binary form must reproduce the above copyright notice,
- # this list of conditions and the following disclaimer in the documentation
- # and/or other materials provided with the distribution.
- #
- # * Neither the name of Erik Edlund, nor the names of its contributors may
- # be used to endorse or promote products derived from this software without
- # specific prior written permission.
- #
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- from __future__ import division
- from __future__ import print_function
- from __future__ import unicode_literals
-
- import argparse
- import datetime
- import json
- import os
- import re
-
-
- class Amalgamation(object):
-
- # Prepends self.source_path to file_path if needed.
- def actual_path(self, file_path):
- if not os.path.isabs(file_path):
- file_path = os.path.join(self.source_path, file_path)
- return file_path
-
- # Search included file_path in self.include_paths and
- # in source_dir if specified.
- def find_included_file(self, file_path, source_dir):
- search_dirs = self.include_paths[:]
- if source_dir:
- search_dirs.insert(0, source_dir)
-
- for search_dir in search_dirs:
- search_path = os.path.join(search_dir, file_path)
- if os.path.isfile(self.actual_path(search_path)):
- return search_path
- return None
-
- def __init__(self, args):
- with open(args.config, 'r') as f:
- config = json.loads(f.read())
- for key in config:
- setattr(self, key, config[key])
-
- self.verbose = args.verbose == "yes"
- self.prologue = args.prologue
- self.source_path = args.source_path
- self.included_files = []
-
- # Generate the amalgamation and write it to the target file.
- def generate(self):
- amalgamation = ""
-
- if self.prologue:
- with open(self.prologue, 'r') as f:
- amalgamation += datetime.datetime.now().strftime(f.read())
-
- if self.verbose:
- print("Config:")
- print(" target = {0}".format(self.target))
- print(" working_dir = {0}".format(os.getcwd()))
- print(" include_paths = {0}".format(self.include_paths))
- print("Creating amalgamation:")
- for file_path in self.sources:
- # Do not check the include paths while processing the source
- # list, all given source paths must be correct.
- # actual_path = self.actual_path(file_path)
- print(" - processing \"{0}\"".format(file_path))
- t = TranslationUnit(file_path, self, True)
- amalgamation += t.content
-
- with open(self.target, 'w') as f:
- f.write(amalgamation)
-
- print("...done!\n")
- if self.verbose:
- print("Files processed: {0}".format(self.sources))
- print("Files included: {0}".format(self.included_files))
- print("")
-
-
- def _is_within(match, matches):
- for m in matches:
- if match.start() > m.start() and \
- match.end() < m.end():
- return True
- return False
-
-
- class TranslationUnit(object):
- # // C++ comment.
- cpp_comment_pattern = re.compile(r"//.*?\n")
-
- # /* C comment. */
- c_comment_pattern = re.compile(r"/\*.*?\*/", re.S)
-
- # "complex \"stri\\\ng\" value".
- string_pattern = re.compile("[^']" r'".*?(?<=[^\\])"', re.S)
-
- # Handle simple include directives. Support for advanced
- # directives where macros and defines needs to expanded is
- # not a concern right now.
- include_pattern = re.compile(
- r'#\s*include\s+(<|")(?P<path>.*?)("|>)', re.S)
-
- # #pragma once
- pragma_once_pattern = re.compile(r'#\s*pragma\s+once', re.S)
-
- # Search for pattern in self.content, add the match to
- # contexts if found and update the index accordingly.
- def _search_content(self, index, pattern, contexts):
- match = pattern.search(self.content, index)
- if match:
- contexts.append(match)
- return match.end()
- return index + 2
-
- # Return all the skippable contexts, i.e., comments and strings
- def _find_skippable_contexts(self):
- # Find contexts in the content in which a found include
- # directive should not be processed.
- skippable_contexts = []
-
- # Walk through the content char by char, and try to grab
- # skippable contexts using regular expressions when found.
- i = 1
- content_len = len(self.content)
- while i < content_len:
- j = i - 1
- current = self.content[i]
- previous = self.content[j]
-
- if current == '"':
- # String value.
- i = self._search_content(j, self.string_pattern,
- skippable_contexts)
- elif current == '*' and previous == '/':
- # C style comment.
- i = self._search_content(j, self.c_comment_pattern,
- skippable_contexts)
- elif current == '/' and previous == '/':
- # C++ style comment.
- i = self._search_content(j, self.cpp_comment_pattern,
- skippable_contexts)
- else:
- # Skip to the next char.
- i += 1
-
- return skippable_contexts
-
- # Returns True if the match is within list of other matches
-
- # Removes pragma once from content
- def _process_pragma_once(self):
- content_len = len(self.content)
- if content_len < len("#include <x>"):
- return 0
-
- # Find contexts in the content in which a found include
- # directive should not be processed.
- skippable_contexts = self._find_skippable_contexts()
-
- pragmas = []
- pragma_once_match = self.pragma_once_pattern.search(self.content)
- while pragma_once_match:
- if not _is_within(pragma_once_match, skippable_contexts):
- pragmas.append(pragma_once_match)
-
- pragma_once_match = self.pragma_once_pattern.search(self.content,
- pragma_once_match.end())
-
- # Handle all collected pragma once directives.
- prev_end = 0
- tmp_content = ''
- for pragma_match in pragmas:
- tmp_content += self.content[prev_end:pragma_match.start()]
- prev_end = pragma_match.end()
- tmp_content += self.content[prev_end:]
- self.content = tmp_content
-
- # Include all trivial #include directives into self.content.
- def _process_includes(self):
- content_len = len(self.content)
- if content_len < len("#include <x>"):
- return 0
-
- # Find contexts in the content in which a found include
- # directive should not be processed.
- skippable_contexts = self._find_skippable_contexts()
-
- # Search for include directives in the content, collect those
- # which should be included into the content.
- includes = []
- include_match = self.include_pattern.search(self.content)
- while include_match:
- if not _is_within(include_match, skippable_contexts):
- include_path = include_match.group("path")
- search_same_dir = include_match.group(1) == '"'
- found_included_path = self.amalgamation.find_included_file(
- include_path, self.file_dir if search_same_dir else None)
- if found_included_path:
- includes.append((include_match, found_included_path))
-
- include_match = self.include_pattern.search(self.content,
- include_match.end())
-
- # Handle all collected include directives.
- prev_end = 0
- tmp_content = ''
- for include in includes:
- include_match, found_included_path = include
- tmp_content += self.content[prev_end:include_match.start()]
- tmp_content += "// {0}\n".format(include_match.group(0))
- if found_included_path not in self.amalgamation.included_files:
- t = TranslationUnit(found_included_path, self.amalgamation, False)
- tmp_content += t.content
- prev_end = include_match.end()
- tmp_content += self.content[prev_end:]
- self.content = tmp_content
-
- return len(includes)
-
- # Make all content processing
- def _process(self):
- if not self.is_root:
- self._process_pragma_once()
- self._process_includes()
-
- def __init__(self, file_path, amalgamation, is_root):
- self.file_path = file_path
- self.file_dir = os.path.dirname(file_path)
- self.amalgamation = amalgamation
- self.is_root = is_root
-
- self.amalgamation.included_files.append(self.file_path)
-
- actual_path = self.amalgamation.actual_path(file_path)
- if not os.path.isfile(actual_path):
- raise IOError("File not found: \"{0}\"".format(file_path))
- with open(actual_path, 'r') as f:
- self.content = f.read()
- self._process()
-
-
- def main():
- description = "Amalgamate C source and header files."
- usage = " ".join([
- "amalgamate.py",
- "[-v]",
- "-c path/to/config.json",
- "-s path/to/source/dir",
- "[-p path/to/prologue.(c|h)]"
- ])
- argsparser = argparse.ArgumentParser(
- description=description, usage=usage)
-
- argsparser.add_argument("-v", "--verbose", dest="verbose",
- choices=["yes", "no"], metavar="", help="be verbose")
-
- argsparser.add_argument("-c", "--config", dest="config",
- required=True, metavar="", help="path to a JSON config file")
-
- argsparser.add_argument("-s", "--source", dest="source_path",
- required=True, metavar="", help="source code path")
-
- argsparser.add_argument("-p", "--prologue", dest="prologue",
- required=False, metavar="", help="path to a C prologue file")
-
- amalgamation = Amalgamation(argsparser.parse_args())
- amalgamation.generate()
-
-
- if __name__ == "__main__":
- main()
|