#! /usr/bin/env python # -*- coding: utf-8 -*- python_command=u'/usr/bin/env python' encoding='utf-8' # Copyright © 2000,2006,2007,2008,2009,2010,2011 Alberto González Palomo # Author: Alberto González Palomo # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # LANG=en import sys import codecs import string import re import os import getopt import glob import traceback import subprocess def display_help(): print r''' Usage: pythonpp [options] [file]... Pre-processes a file and expands directives. -h, --help Display this message. -s, --source Outputs the produced python program, instead of executing it. This adds '.py' to the output file name. -d, --debug Sets debug mode. Each appearance increments debug level by one. E.g.: -ddd sets debug level to 3. Level 1: Shows the file being processed and some intermediate results. Level 2: Copies the produced python program to the file '/tmp/pythonpp.debug' before executing it. The output is appended to the file so that all output is available when processing several files. -l, --literal Set default quoting as literal. (default) -i, --interpolated Set default quoting as interpolated. --marker Set interpolation marker character. Default is '$' -e, --execute=code Execute 'code' as python code before processing each file. File processing takes place in the same scope as this code. Lines can be separated with \n, but this is the only escape sequence converted. I.e., \t is not converted to a tab character. --pythonpath=path Insert path at the beginning of Python's module load path, like when setting the environment variable PYTHONPATH. Several paths can be added separating them with a colon ":", or by setting this option several times. -c, --command=cmd The python executable location. --nowait Do not wait for the python interpreter to finish processing one file before starting with the next or exiting. This means that pythonpp could return before the output was complete, so later commands should not depend on that. The default is to wait. --encoding=charset Charset encoding of the template files. Default is utf-8. -a, --affix=affix (Pre|Suf)fix to remove from input file name to form the output file name. It's a Perl-style regular expression. The default is '^template\.'. -C, --directory=dir Change to directory "dir". Like in "make", several occurrences of this option concatenate their effects: "-C / -C etc" is equivalent to "-C /etc", while "-C etc -C /" is equivalent to "-C /". -o, --output=path Output file or directory name. If 'path' has a '/' at the end, it's prepended to the normal output file names. In this case, an input file name is needed to generate the output file name which gets appended to 'path'. If 'path' is '-', output is sent to stdout. --filter=pipe Filter the output through the given pipe. The pipe should not have an initial '|'. -m, --map=src,dst Map the initial path src of the input file name to path dst, creating all intermediate directories as necessary when writing the output file. --depends=prereq Build the output file only if one of the prerequisites in prereq is newer. prereq is a comma-separated list of path name patterns as implemented in the "glob" Python module: "*", "?", "[abc]" as in UNIX shells. If this option is present, even if prereq is void, the source file is appended to the prerequisites list, so using it as "--depends=" will only process the input file if it's newer than the output file. --depends-nested Adds all files in the ancestor directories of each input file (including its siblings) to the prerequisites. The top ancestor is the first path component in the given input file name. -p, --path=dir Insert "dir" at the beginning of the include path, which is the list of directories where files included with the "!<" directive (see below) are searched for. Several directories can be specified at once by separting them with colons ":". The directory where the including file resides is implicitly added without having to use this option. --delete Delete the generated file if it exists, instead of writing it. --exclude=pattern Files to exclude, separated by commas. Files and directories given explicitly as arguments are never excluded. For instance, to exclude all .svn and .cvs directories and all Emacs backup files: --exclude=.svn,.cvs,*~ The default is '*~'. If no output file name is given, it's computed from the input file name, by removing the affix, except when input is stdin, in which case output is stdout. If an input file is a directory, all files inside (recursively) that match the affix pattern are processed. The "quote" is the first caracter in a line (perhaps preceded by a hash '#' or at '@' symbol which is ignored if followed by a valid "quote" directive): '' Literal quoting. Outputs line as is. "" Interpolated quoting. Substitutes every ${expression} by the expression's value. It can't contain newlines. There are two special cases: '${}' expands to '$' '$${expression}' expands to '${expression}' The default interpolation marker character '$' can be replaced with the --marker option. ! Execute the following python code line, or a special function. The first character after the '!' triggers some special functions: '<': evaluates the rest of the line as a python expression to get a file name, switch input to that file, and continue when finished. (like the C directive #include) There can be no spaces between the '!' and the '<', since this directive does not generate any python code of its own and thus we can make this restriction to avoid false positives. The path is relative to the file that contains the include directive. If the path begins with a slash '/', it's relative to the directory from which pythonpp was started. The output of the included file can be captured into a variable by putting the variable name between "><" before the path as in !<>varname<"/tmp/filename" If the variable name has the prefix '+', the output will be appended to the previous variable value. '<<': same as '<', except that a '!' character is assumed before each line in the included file. ':': the rest of the line is evaluated as an integer, and the extra indentation for produced lines is set according to it. An unsigned number, like "1", sets the indentation to that value (always using spaces), and a signed value like "+1" or "-1" increments or decrements the indentation by that amount. By default it indents using spaces, but if a "t" is put after the number, the characters inserted are tabulators. Each tab counts as 1 character. Thus, "!:+3t" indents with 3 tabs, and "!:-3" returns to the previous indentation, exactly like "!:-3t". The space between the '!' and the ':' sets the indentation for the generated python code. If the character after the '!' is anything else, the line is evaluated as python code. `` Back quote. Executes this line as a shell command, and outputs its stdout. If the first character in a line isn't a valid "quote" symbol, it's processed using the default quoting. If a line produces an empty output, the newline character is suppressed except for lines without quote directive. Shell quotes execute one line at a time. Code quotes join all the lines before executing them as a program. There can be spaces between quote pairs. This sets the indentation for the generated python write() statement, for integration with code directives. Pre-defined variables reachable from templates: - pythonpp_input_file_name : current file name - pythonpp_output_indent_string: current indentation - pythonpp_out : output stream - pythonpp_message(text) : function that prints a message to stderr Report bugs to matmota@matracas.org Last modified 2011-12-18 14:00 ''' def message(message_line): sys.stderr.write(message_line.encode('utf-8') + '\n') def main(): global top_process_level global line_default global interpolation_marker interpolation_marker = "$" global code_to_execute global python_command global python_path python_path = [] #global wait_until_finished wait_until_finished = True global encoding line_default = line_literal code_to_execute = '' if not globals().has_key("python_command") or not python_command: raise Exception("Variable not defined: python_command") if not globals().has_key("encoding") or not encoding: raise Exception("Variable not defined: encoding") global output_indent_string output_indent_string = '' global output_capture_variable output_capture_variable = '' global pending_content pending_content = '' global source_file_name global source_file_line_number source_file_name = u"" source_file_line_number = 0 output = '' output_source = 0 global output_filter output_filter = '' global debug debug = 0 template_file_name_affix = '^template\.' path_map = {} prerequisites = [] depends_nested = False delete = False exclude_patterns = ["*~"] global include_path include_path = [] try: opts, args = getopt.getopt(decode_arguments(sys.argv[1:]), 'hsdlie:c:a:C:o:m:p:', ('help', 'source', 'debug', 'literal', 'interpolated', 'marker=', 'execute=', 'command=', 'pythonpath=', 'nowait', 'encoding=', 'affix=', 'directory=', 'output=', 'filter=', 'map=', 'path=', 'depends=', 'depends-nested', 'delete', 'exclude=')) except getopt.error, problem: print 'Command line option problem: ', problem, '\n' display_help() return(1) for o, a in opts: if (o == '-s')|(o == '--source'): output_source = 1 if (o == '-d')|(o == '--debug'): debug = debug + 1 if (o == '-l')|(o == '--literal'): line_default = line_literal if (o == '-i')|(o == '--interpolated'): line_default = line_interpolated if (o == '--marker'): interpolation_marker = a if (o == '-e')|(o == '--execute'): code_to_execute = a if (o == '-c')|(o == '--command'): python_command = a if (o == '--pythonpath'): python_path[:0] = a.split(":") if (o == '--nowait'): wait_until_finished = False if (o == '--encoding'): encoding = a if (o == '-a')|(o == '--affix'): template_file_name_affix = a if (o == '-C')|(o == '--directory'): os.chdir(a) if (o == '-o')|(o == '--output'): output = a if (o == '--filter'): output_filter = a if (o == '-m')|(o == '--map'): path_map.update([a.split(",")]) if (o == '-p')|(o == '--path'): include_path[:0] = a.split(":") if (o == '--depends'): prerequisites = a.split(",") if (o == '--depends-nested'): depends_nested = True if (o == '--delete'): delete = True if (o == '--exclude'): exclude_patterns = a.split(",") if (o == '-h')|(o == '--help'): display_help() return(0) if output and path_map != {}: print 'The options --output and --map can not be used at the same time.\n' display_help() return(1) escaped_interpolation_marker = re.compile(r"([$^?*.{}[\]\\])").sub(r"\\\1", interpolation_marker) global interpolation interpolation = re.compile(2*escaped_interpolation_marker+'?{([^}\n]*)}') path_re = {} for path in path_map.keys(): path_re.update([[path, re.compile(path+'(/.*)?$')]]) re_affix = re.compile(template_file_name_affix) exclude_regexp = "" for i in range(0, len(exclude_patterns)): if i > 0: exclude_regexp += "|" exclude_regexp += fnmatch.translate(exclude_patterns[i]) expand_file_names(args, re_affix, re.compile(exclude_regexp)) for input_file in args: if debug: message("Input file: " + input_file); top_process_level = 1 file_name = os.path.basename(input_file) file_dir = os.path.dirname(input_file) if output == '-': output_file = '' elif output and output[-1:] != '/': output_file = output elif input_file == '-': output_file = output else: if output: output_dir = output else: output_dir = file_dir for path in path_map.keys(): if path_re[path].match(output_dir): if debug: message("Mapping " + output_dir + " to " + output_dir.replace(path, path_map[path], 1)) output_dir = output_dir.replace( path, path_map[path], 1) break output_file = os.path.join(output_dir, re.sub(re_affix, '', file_name) ) if debug: message("Output file: '" + output_file + "'") if os.path.basename(output_file) == file_name: message('Error: File name "' + input_file + '" doesn\'t match affix pattern "' + str(template_file_name_affix) + '". Not processed.') return(1) if (output_dir): if not os.path.exists(output_dir): if not delete: os.makedirs(output_dir) elif not os.path.isdir(output_dir): message('Error: The output path "' + output_dir + '" is not a directory. ' + 'Not processed.') return(1) if (prerequisites or depends_nested) and output_file and os.path.exists(output_file): rebuild = False mtime = os.path.getmtime(output_file) for prereq_pattern in prerequisites: if rebuild: break prereqs = glob.glob(prereq_pattern) if input_file != "-": prereqs.append(input_file) for prereq in prereqs: rebuild |= os.path.getmtime(prereq) > mtime if depends_nested and not rebuild and input_file != "-": ancestor = os.path.dirname(input_file) while (ancestor and ancestor != "/" and ancestor != "." and not rebuild): for prereq in glob.glob(os.path.join(ancestor,"*")): rebuild |= os.path.isfile(prereq) and os.path.getmtime(prereq) > mtime ancestor = os.path.dirname(ancestor) if not rebuild: if debug: message(output_file + " is up to date.") continue if delete: if os.path.exists(output_file): message('Deleting "' + output_file + '"') os.remove(output_file) continue if output_source: pipe_command = u'' if output_file != '': output_file = output_file + '.py' else: pipe_command = python_command if output_filter: pipe_command += u' | ' + output_filter if (output_file != '') & (pipe_command != ''): pipe_command = pipe_command + ' >' + output_file pythonpp_process = 0 if pipe_command != '': if debug > 1: pipe_command = u'tee -a /tmp/pythonpp.debug | ' + pipe_command message("pipe_command " + str(type(pipe_command)) + ": " + pipe_command) #out = os.popen(pipe_command.encode('utf-8'), 'w') # /bin/sh is the default, but it can't handle utf-8 file names: pythonpp_process = subprocess.Popen(pipe_command, executable='/bin/bash', shell=True, stdin=subprocess.PIPE) out = pythonpp_process.stdin else: out = open(output_file, 'w') try: out = codecs.getwriter(encoding)(out) if input_file == '-': source_file_name = "" process('', out) else: source_file_name = input_file process(input_file, out) except: if (debug): message('Error: ' + traceback.format_exc()); else: message('Error: ' + str(sys.exc_info()[0]) + ": " + str(sys.exc_info()[1]) + "\n"); message('file = \'' + source_file_name + '\'') message('line = ' + str(source_file_line_number)) message('container file = \'' + input_file + '\'') message('pipe_command = \'' + pipe_command + '\'') message('output_file = \'' + output_file + '\'') out.close() return(1) out.close() if pythonpp_process and wait_until_finished: (pid, result) = os.waitpid(pythonpp_process.pid, 0) if result != 0: message('file = \'' + source_file_name + '\'') message('line = ' + str(source_file_line_number)) message('container file = \'' + input_file + '\'') message('pipe_command = \'' + pipe_command + '\'') message('output_file = \'' + output_file + '\'') return(result) if output_source & (output_file != ''): os.chmod(output_file, 0750) def interpolate_expression(matchobj): if not matchobj.group(1): return interpolation_marker if matchobj.group(0)[1] == interpolation_marker: return matchobj.group(0)[1:] value = string.replace(matchobj.group(1), '\\"', '"') value = string.replace(value, '\\\\', '\\') return('"+str(' + value + ')+"') def generate_line(indent, code): global output_capture_variable if output_capture_variable: line = output_capture_variable + ' += ' + code else: line = 'pythonpp_out.write(' + code + ')' return (indent + line + '\n') def line_literal(indent, content, output): content = string.replace(content, '\\', '\\\\') content = 'pythonpp_output_indent_string + "' + string.replace(content, '"', '\\"') + '\\n"' output.write(generate_line(indent, content)) return True def line_interpolated(indent, content, output): content = string.replace(content, '\\', '\\\\') content = string.replace(content, '"', '\\"') content = re.sub(interpolation, interpolate_expression, content) if len(content) <= 2 and line_has_directive: return True output.write(generate_line(indent, 'pythonpp_output_indent_string + "' + content + '\\n"')) return True def line_code(indent, content, output): global pending_content output.write(indent + content + '\n') if content[:4] == 'def ' and content[-1:] == ':': pending_content = '!global pythonpp_output_indent_string\n' return True def line_include(indent, content, output, input_file_name): global source_file_name global source_file_line_number global include_path code_include = (content[1] == '<') if code_include: content = content[1:] capture_variable = '' if content[1] == '>': end = 2 while end < len(content) and content[end] != '<': end += 1 if end < len(content): capture_variable = content[2:end] if capture_variable[0] == '+': capture_variable = capture_variable[1:] output.write(indent + "if not '" + capture_variable + "' in locals().keys(): " + capture_variable + " = ''\n") else: output.write(indent + capture_variable + " = ''\n") content = content[end:] try: file_name = eval(content[1:]).decode("utf-8") except: message('Error: ' + str(sys.exc_info()[0]) + ": " + str(sys.exc_info()[1]) + ",\n" + " when evaluating '" + content[1:] + "'\n" + ' for file inclusion in\n' + ' \'' + source_file_name + '\'') return False if '/' == file_name[0]: full_file_name = os.path.join(os.curdir, file_name[1:]) extended_include_path = [os.curdir] else: extended_include_path = [os.path.dirname(input_file_name)] + include_path for directory in extended_include_path: full_file_name = os.path.join(directory, file_name) if os.path.exists(full_file_name) and not os.path.samefile(input_file_name, full_file_name): break if os.path.samefile(input_file_name, full_file_name): message('Error: ' + "include loop" + ": " + "'" + input_file_name + "' << '" + full_file_name + "'") return False if not os.path.exists(full_file_name): message('Error: ' + "file does not exist" + ": " + "'" + file_name + "',\n" + " in neither of:") for directory in extended_include_path: message(" '" + os.path.join(directory, file_name) + "'") message(' for file inclusion in\n' + ' \'' + source_file_name + '\'') return False global debug if debug: message(u" - " + full_file_name); container_source_file_name = source_file_name container_source_file_line_number = source_file_line_number global output_capture_variable previous_output_capture_variable = output_capture_variable if capture_variable: output_capture_variable = capture_variable process(full_file_name, output, code_include) output_capture_variable = previous_output_capture_variable source_file_name = container_source_file_name source_file_line_number = container_source_file_line_number return True def line_indent(indent, content, output, input_file_dirname): global output_indent_string basic_indent_unit = " " if "t" == content[-1:]: basic_indent_unit = "\t" content = content[:-1] if "+" == content[1:2]: output_indent_string = output_indent_string + basic_indent_unit * eval(content[1:]) elif "-" == content[1:2]: output_indent_string = output_indent_string[0:eval(content[1:])] else: output_indent_string = basic_indent_unit * eval(content[1:]) output.write(indent + 'pythonpp_output_indent_string = ' + repr(output_indent_string) + '\n') return True def line_pipe(indent, content, output): content = string.replace(content, '\\', '\\\\') content = '"' + string.replace(string.strip(content), '"', '\\"') + '"' content = re.sub(interpolation, interpolate_expression, content) if not content[1:-1] and line_has_directive: message("Malformed piped line: " + content) return False #output.write(indent + 'pytonpp_pipe = os.popen(' + content + ')\n') output.write(indent + 'pythonpp_pipe = subprocess.Popen(' + content + ', executable="/bin/bash", shell=True, stdout=subprocess.PIPE).stdout\n') output.write(indent + 'pythonpp_cmd_output = pythonpp_pipe.read()\n') #output.write(indent + 'if pythonpp_cmd_output:\n') output.write(generate_line(indent, 'pythonpp_cmd_output')) #output.write(indent + 'pythonpp_pipe.close()\n') return True def output_pending(indent, output): global pending_content for content_line in pending_content.rstrip().split('\n'): if content_line[0] == "'": line_literal(indent, content_line[1:], output) elif content_line[0] == '"': line_interpolated(indent, content_line[1:], output) elif content_line[0] == '!': line_code(indent, content_line[1:], output) elif content_line[0] == '`': line_pipe(indent, content_line[1:], output) else: line_default(indent, content_line, output) pending_content = '' re_directive = re.compile(r'[#@]?([\'"!`])([ \t]*)([\'"`]?)([^\n\r]*)') def process(input_file_name, output, code_include = False): if input_file_name: fpi = codecs.open(input_file_name, 'r', encoding) else: fpi = sys.stdin global line_has_directive global re_directive global top_process_level global pending_content global source_file_line_number source_file_line_number = 0 if top_process_level: output.write('#! ' + python_command + '\n') output.write('# -*- coding: ' + encoding.lower() + ' -*-\n') if (code_to_execute != ''): output.write(string.replace(code_to_execute, '\\n', '\n') + '\n') output.write('import sys\n') if len(python_path) > 0: output.write('sys.path[:0] = ' + repr(python_path) + '\n') output.write('import os\n') output.write('import subprocess\n') output.write('pythonpp_out = sys.stdout\n') output.write('def pythonpp_message(message_line): sys.stderr.write(message_line + "\\n")\n') output.write('pythonpp_output_indent_string = ""\n') output.write('pythonpp_input_file_name = "' + input_file_name + '"\n') top_process_level = 0 ok = True while 1: line = fpi.readline() if not line or not ok: break ++source_file_line_number if code_include: line = "!" + line matchobj = re_directive.match(line) if matchobj: line_has_directive = 1 directive = matchobj.group(1) indent = matchobj.group(2) content = matchobj.group(4) if (len(matchobj.group(3)) > 0 and directive[-1:] != matchobj.group(3)): content = matchobj.group(2) + matchobj.group(3) + content if pending_content: output_pending(indent, output) if directive == "'": ok = line_literal (indent, content, output) continue elif directive == '"': ok = line_interpolated (indent, content, output) continue elif directive == '!': if indent + content[:1] == '<': ok = line_include(indent, content, output, input_file_name) elif content[:1] == ':': ok = line_indent(indent, content, output, input_file_name) else: ok = line_code (indent, content, output) continue elif directive == '`': ok = line_pipe (indent, content, output) continue else: message('Error: illegal quote "' + directive + '" in pythonpp/process()') else: line_has_directive = 0 if line[-1:] == '\n': line = line[:-1] line_default ('', line, output) fpi.close() if not ok: exit(1) def decode_arguments(args): decoded = [] for a in args: # The arguments arrive as UTF-8 #message(str(type(a))) #for i in range(0, len(a)): # message("char " + str(i) + ": " + str(ord(a[i])) + ": " + a[i]) decoded.append(a.decode('utf-8')) return decoded import fnmatch def expand_file_names(args, re_affix, re_exclude): i = 0 if debug: message("Decoded args: " + str(args)) while i < len(args): input_file_name = args[i] if os.path.isdir(input_file_name): new_args = [] for root, dirs, files in os.walk(input_file_name): dirs[:] = filter(lambda x: not re_exclude.match(x), dirs) for name in files: if re_affix.match(name) and not re_exclude.match(name): new_args.append(os.path.join(root, name)) args[i:i+1] = new_args i += len(new_args) i += 1 if not args: args = ['-'] if debug: message("Expanded file name list: " + str(args)) #import profile #profile.run('main()') # Uncomment the main() invocation to use this file as a standalone command. #main()