#!/bin/awk -f
# vim: ft=awk noet sts=4 ts=4 sw=4
BEGIN {
tok_n = 0 # couter of tokens accumulated
# token types
T_TEXT = 1; type_names[1] = "text"
T_EXPR = 2; type_names[2] = "expr"
T_AWK = 3; type_names[3] = "awk"
T_FUNC = 4; type_names[4] = "func"
tok_type = T_TEXT
# get current directory
"pwd" | getline PWD
close("pwd")
# empty the depency file
if ("AAT_DEP" in ENVIRON) {
printf "" >ENVIRON["AAT_DEP"]
}
}
# Append 'content' to array of tokens. Token type is taken from current value
# of tok_type. It concatenates sequence of tokens of same type, unless
# tok_finished is set.
function token(content) {
if(!content) return
# concatenate tokens of same type if the previous one doesn't end in newline
if((!tok_finished[tok_n]) && tok_n && tok_type == tok_types[tok_n]) {
if(DEBUG) printf "concat \"%s\" \"%s\"\n", tok_contents[tok_n], content >"/dev/stderr"
tok_contents[tok_n] = tok_contents[tok_n] content
} else {
tok_types[++tok_n] = tok_type
tok_contents[tok_n] = content
}
if(DEBUG) printf "token %d (%s): \"%s\"\n", \
tok_n, type_names[tok_type], tok_contents[tok_n] >"/dev/stderr"
}
# Print file dependencies for makefile usage
function print_dep(str) {
if(str !~ /^\//) {
str = PWD "/" str
}
if ("AAT_DEP" in ENVIRON) {
print str >>ENVIRON["AAT_DEP"]
} else {
print str >>"/dev/stderr"
}
}
function die(msg) {
print msg >>"/dev/stderr"
exit 1
}
function sh_escape(str) {
gsub(/["$\\]/, "\\&", str)
return "\"" str "\""
}
# find a file relative to current filename and overwrite that variable
function find_file(name) {
if(DEBUG) printf "find_file(\"%s\") PWD=\"%s\"\n", name, PWD >"/dev/stderr"
# TODO: include search path
if(name ~ /^\//) {
# absolute path
filename = name
} else {
# relative path
if(match(filename, "/[^/]*$")) {
filename = substr(filename, 1, RSTART) name
} else {
filename = name
}
}
if(system("test -f " sh_escape(filename)) != 0) {
if("MAKE_CMD" in ENVIRON) {
if(system(ENVIRON["MAKE_CMD"] " " sh_escape(filename)) != 0) {
die("could not build requested file: " sh_escape(filename) " (" name ") PWD: " PWD)
}
} else {
die("could not find requested file: " sh_escape(filename) " (" name ") PWD: " PWD)
}
}
print_dep(filename)
}
function macro_readinto(args, varname, fname) {
varname = fname = args
sub(/[ \t].*$/, "", varname)
sub(/^[^ \t]+[ \t]+/, "", fname)
find_file(fname)
insert_comment("start @readinto "varname" "filename" {{{")
tok_type=T_AWK
while(getline <filename) {
gsub(/["\\]/, "\\&", $0)
token(varname " = \"" $0 "\\n\"\n")
}
close(filename)
insert_comment("}}} end @readinto "varname" "filename)
}
function insert_comment(str, tok_type_prev) {
tok_type_prev = tok_type
sub(/^/, "# ", str)
gsub(/\n/, "\n# ", str)
tok_type=T_AWK
token(str "\n")
tok_type = tok_type_prev
}
function call_macro(name, args, file_old) {
if(DEBUG) printf "call_macro(\"%s\", \"%s\")\n", name, args >"/dev/stderr"
# store current filename so macros can change it, restore before function exits
file_old = filename
# Macro to recursively parse another template
if(name == "include"){
find_file(args)
insert_comment("start @include "filename" {{{")
while(getline <filename) {
parse_line($0)
}
close(filename)
insert_comment("}}} end @include "filename)
}
# Macro to insert another file as verbatim code
else if(name == "awk"){
find_file(args)
insert_comment("start @awk "filename" {{{")
tok_type=T_AWK
while(getline <filename) {
token($0 "\n")
}
close(filename)
insert_comment("}}} end @awk "filename)
}
# Macro to insert another file as text
else if(name == "text"){
find_file(args)
insert_comment("start @text "filename" {{{")
tok_type=T_TEXT
while(getline <filename) {
token($0 "\n")
tok_finished[tok_n] = 1 # break line
}
close(filename)
insert_comment("}}} end @text "filename)
}
# Macro to insert source filename as a variable into produced code
else if(name == "filename"){
tok_type=T_AWK
args = filename
while(match(args, "[^/]+/../") != 0) {
gsub("[^/]+/../", "", args)
}
gsub(/["\\]/, "\\&", args)
token("filename = \"" args "\"\n")
}
# Macro to read content of a file into a variable
else if(name == "readinto"){
macro_readinto(args)
}
# Assign a variable with query expression
# else if(name == "let"){
# if (!match(args, / *= */)) {
# print "ERROR: invalid let statement: " args >"/dev/stderr"
# exit 1
# }
# tok_type=T_AWK
# token("V[\"" substr(args, 1, RSTART-1) "\"] = " \
# substr(args, RSTART+RLENGTH)) "\n"
# # aat_process(substr(args, RSTART+RLENGTH)) "\n")
# }
# if with a query expression
else if(name == "if"){
tok_type=T_AWK
token("if(<" substr(args, RSTART+RLENGTH) ">) {\n")
}
# else if with a query expression
else if(name == "elif"){
tok_type=T_AWK
token("} else if(<" substr(args, RSTART+RLENGTH) ">) {\n")
}
# Leave the @ there for postprocessing with sed
else {
tok_type=T_AWK
token("@" name " " args "\n")
}
filename = file_old
}
function parse_line(line) {
# Handle linewise syntax
if(tok_type == T_TEXT) {
# if line starts with @@ or || it is actually an escape for having text
# start with single @ or | respectively
if(/^\(@@|\|\|\)/){
line=substr(line, 2)
}
# Lines starting with @ are macros. Some are handled in call_macro,
# others currently by passing it through as awk code and postprocessing
# with sed.
else if(/^@/){
match(substr(line, 2), "[^ \t]+")
call_macro( \
substr(line, 1+RSTART, RLENGTH), \
substr(line, 2+RSTART+RLENGTH) \
)
tok_type=T_TEXT
return
}
# Lines starting with | are considered verbatim awk code
else if(/^\|/) {
tok_type=T_AWK
token(substr(line, 2) "\n")
tok_type=T_TEXT
return
}
}
# Handle text, with interleaved blocks for code and expressions
while(length(line)) {
if(DEBUG) printf "%d: \"%s\"\n", tok_n, line >"/dev/stderr"
eat_nl = 0
if(tok_type == T_TEXT) {
# all text until a start of expression "{{", or start of awk code "{%"
m = match(line, /\{[{%<]/)
if(m) {
token(substr(line, 1, m-1))
if (substr(line, m, RLENGTH) == "{{")
tok_type = T_EXPR
else if (substr(line, m, RLENGTH) == "{%")
tok_type = T_AWK
else if (substr(line, m, RLENGTH) == "{<")
tok_type = T_FUNC
else { print "internal error" >"/dev/stderr"; exit 1 }
line = substr(line, m+RLENGTH)
} else {
# no delimiter found, whole line is text
token(line)
line = ""
}
} else if(tok_type == T_FUNC) {
m = match(line, />}/)
if(m) {
if(DEBUG) printf "expr match: \"%s\"\n", substr(line, m, RLENGTH) >"/dev/stderr"
token(substr(line, 1, RSTART-1))
tok_finished[tok_n] = 1
line = substr(line, RSTART+RLENGTH)
tok_type = T_TEXT
} else {
# did not match whole expression because of end of line
token(line)
line = ""
}
} else if(tok_type == T_EXPR || tok_type == T_AWK) {
# match text inside awk code or expression
# code stops on "%}" and expression on "}}"
# misses few corner cases handled in the ifs below
if(tok_type == T_EXPR)
m = match(line, /^(}?([^}"]|("([^"]|\\")*")))+/)
else
m = match(line, /^([^%"]|(%+[^}%"])|("([^"]|\\")*"))+/)
if(m) {
if(DEBUG) printf "expr match: \"%s\"\n", substr(line, m, RLENGTH) >"/dev/stderr"
token(substr(line, 1, RLENGTH))
line = substr(line, RLENGTH+1)
} else if(length(line) == 1) {
# did not match whole expression because of end of line
token(line)
line = ""
} else if(match(line, /^%+$/)) {
# did not match whole code, as the above regexp fails on sequence of "%" at EOL
token(line)
line = ""
} else if( \
(tok_type == T_EXPR && substr(line, 1, 2) == "}}") || \
(tok_type == T_AWK && substr(line, 1, 2) == "%}") ) {
# end of expression / code block
if(tok_type == T_AWK) eat_nl = 1
tok_finished[tok_n] = 1
if(DEBUG) printf "finished (%s) \"%s\"\n", \
type_names[tok_types[tok_n]], tok_contents[tok_n] >"/dev/stderr"
tok_type = T_TEXT
line = substr(line, 3)
} else {
print "ERROR: could not parse line " NR ": " line >"/dev/stderr"
exit 1
}
} else {
print "ERROR: unknown tok_type: " tok_type >"/dev/stderr"
exit 1
}
if(DEBUG) printf "-<%s>- \"%s\"\n", type_names[tok_type], line >"/dev/stderr"
}
# don't add newline just after the code block
if(eat_nl)
eat_nl = 0
else {
token("\n")
if(tok_type == T_TEXT)
tok_finished[tok_n] = 1 # end text tokens on newlines, so we get nicer output
}
}
# for every line in files in ARGV
{
# current filename being read
filename = FILENAME
# parse the line
parse_line($0)
}
# print the output
END {
nl = 1 # are we on new line?
for(tok_n=1; tok_types[tok_n]; tok_n++) {
tok_type = tok_types[tok_n]
c = tok_contents[tok_n]
if(nl && tok_type != T_AWK) {
printf "%s", "printf \"%s\","
nl = 0
}
if(tok_type == T_TEXT) {
linebreak = match(c, "\n$")
gsub(/\\/, "\\\\", c)
gsub(/"/, "\\\"", c)
gsub(/\n/, "\\n", c)
printf " \"%s\"%s", c, (linebreak ? "\n" : "")
nl = linebreak
} else if(tok_type == T_AWK) {
printf "%s%s%s", (nl ? "" : "\n"), c, (c ~ /\n$/ ? "" : "\n")
nl = 1
} else if(tok_type == T_EXPR) {
printf " (%s)", c
} else if(tok_type == T_FUNC) {
# TODO
printf " (<%s>)", c
} else {
print "ERROR: unknown tok_type: " tok_type >"/dev/stderr"
exit 1
}
}
}