=== modified file 'bin/aat.awk' --- bin/aat.awk 2013-08-14 21:17:17 +0000 +++ bin/aat.awk 2013-08-14 20:02:38 +0000 @@ -1,26 +1,19 @@ #!/bin/awk -f BEGIN { - tok_n = 0 # couter of tokens accumulated - - # token types - T_TEXT = 1; type_names[1] = "text" - T_EXPR = 2; type_names[2] = "expr" - T_AWK = 3; type_names[3] = "awk" - tok_type = T_TEXT + tok_n = 0 + mode = "t" } function token(content) { if(!content) return - # concatenate tokens of same type if the previous one doesn't end in newline - if((!tok_finished[tok_n]) && tok_n && tok_type == tok_types[tok_n]) { - if(DEBUG) printf "concat \"%s\" \"%s\"\n", tok_contents[tok_n], content - tok_contents[tok_n] = tok_contents[tok_n] content + if(tok_n && mode == tok_type[tok_n] && tok_content[tok_n] !~ "\n$") { + #if(DEBUG) printf "concat \"%s\" \"%s\"\n", tok_content[tok_n], content + tok_content[tok_n] = tok_content[tok_n] content } else { - tok_types[++tok_n] = tok_type - tok_contents[tok_n] = content + tok_type[++tok_n] = mode + tok_content[tok_n] = content } - if(DEBUG) printf "token %d (%s): \"%s\"\n", \ - tok_n, type_names[tok_type], tok_contents[tok_n] >"/dev/stderr" + if(DEBUG) printf "token %d (%s): \"%s\"\n", tok_n, mode, tok_content[tok_n] >"/dev/stderr" } { @@ -28,26 +21,22 @@ while(length(line)) { if(DEBUG) printf "%d: \"%s\"\n", tok_n, line >"/dev/stderr" eat_nl = 0 - if(tok_type == T_TEXT) { - # all text until a start of expression "{{", or start of awk code "{%" + if(mode == "t") { m = match(line, /\{[{%]/) if(m) { + # start expression token(substr(line, 1, m-1)) if (substr(line, m, RLENGTH) == "{{") - tok_type = T_EXPR + mode = "e" else - tok_type = T_AWK + mode = "a" line = substr(line, m+RLENGTH) } else { - # no delimiter found, whole line is text token(line) line = "" } - } else if(tok_type == T_EXPR || tok_type == T_AWK) { - # match text inside awk code or expression - # code stops on "%}" and expression on "}}" - # misses few corner cases handled in the ifs below - if(tok_type == T_EXPR) + } else if(mode == "e" || mode == "a") { + if(mode == "e") m = match(line, /^(}?([^}"]|("([^"]|\\")*")))+/) else m = match(line, /^([^%"]|(%+[^}%"])|("([^"]|\\")*"))+/) @@ -56,67 +45,97 @@ token(substr(line, 1, RLENGTH)) line = substr(line, RLENGTH+1) } else if(length(line) == 1) { - # did not match whole expression because of end of line + # end of line token(line) line = "" } else if(match(line, /^%+$/)) { - # did not match whole code, as the above regexp fails on sequence of "%" at EOL token(line) line = "" } else if( \ - (tok_type == T_EXPR && substr(line, 1, 2) == "}}") || \ - (tok_type == T_AWK && substr(line, 1, 2) == "%}") ) { - # end of expression / code block - if(tok_type == T_AWK) eat_nl = 1 - tok_finished[tok_n] = 1 - if(DEBUG) printf "finished (%s) \"%s\"\n", \ - type_names[tok_types[tok_n]], tok_contents[tok_n] >"/dev/stderr" - tok_type = T_TEXT + (mode == "e" && substr(line, 1, 2) == "}}") || \ + (mode == "a" && substr(line, 1, 2) == "%}") ) { + # end of expression + if(mode == "a") eat_nl = 1 + mode = "t" line = substr(line, 3) } else { print "ERROR: could not parse line " NR ": " line >"/dev/stderr" exit 1 } } else { - print "ERROR: unknown tok_type: " tok_type >"/dev/stderr" + print "ERROR: unknown mode: " mode >"/dev/stderr" exit 1 } - if(DEBUG) printf "-<%s>- \"%s\"\n", type_names[tok_type], line >"/dev/stderr" + if(DEBUG) printf "-<%s>- \"%s\"\n", mode, line >"/dev/stderr" } # don't add newline just after the code block if(eat_nl) eat_nl = 0 - else { + else token("\n") - if(tok_type == T_TEXT) - tok_finished[tok_n] = 1 # end text tokens on newlines, so we get nicer output +} + +function apply_macros(c) { + orig = c + for(man_apply_n in macros) { + m = macros[man_apply_n] + if(!match(m, /^\/([^\/]|(\\\/))+\//)) { + print "ERROR: invalid macro regexp: " m >"/dev/stderr" + exit 1 + } + regexp = substr(m, 2, RLENGTH-2) + m = substr(m, RLENGTH+1) + if(!match(m, /^([^\/]|(\\\/))*\//)) { + print "ERROR: invalid macro replacement: " m >"/dev/stderr" + exit 1 + } + replacement = substr(m, 1, RLENGTH-1) + flags = substr(m, RLENGTH) + # gensub() is not posix + if(match(flags, /g/)) + sub_count = gsub(regexp, replacement, c) + else + sub_count = sub(regexp, replacement, c) + if(DEBUG && sub_count) printf "code \"%s\" matched macro /%s/, replacing with \"%s\", flags \"%s\"\n", orig, regexp, replacement, flags + if(sub_count) { + if(match(flags, /r/)) + return apply_macros(c) + return c + } } + return c } END { - nl = 1 # are we on new line? - for(tok_n=1; tok_types[tok_n]; tok_n++) { - tok_type = tok_types[tok_n] - c = tok_contents[tok_n] - if(nl && tok_type != T_AWK) { - printf "%s", "printf \"%s\"," + nl = 1 + mac_n = 0 + for(tok_n=1; tok_type[tok_n]; tok_n++) { + mode = tok_type[tok_n] + c = tok_content[tok_n] + if(nl && mode != "a") { + printf "%s", "printf \"%s\", " nl = 0 } - if(tok_type == T_TEXT) { + if(mode == "t") { linebreak = match(c, "\n$") gsub(/\\/, "\\\\", c) gsub(/"/, "\\\"", c) gsub(/\n/, "\\n", c) - printf " \"%s\"%s", c, (linebreak ? "\n" : "") + printf "\"%s\"%s", c, (linebreak ? "\n" : "") nl = linebreak - } else if(tok_type == T_AWK) { - printf "%s%s%s", (nl ? "" : "\n"), c, (c ~ /\n$/ ? "" : "\n") - nl = 1 - } else if(tok_type == T_EXPR) { - printf " (%s)", c + } else if(mode == "a") { + if(match(c, /\/([^\/]|(\\\/))+\/([^\/]|(\\\/))*\/[[:alpha:]]*/)) { + macros[++mac_n] = c + } else { + c = apply_macros(c) + printf "%s%s%s", (nl ? "" : "\n"), c, (c ~ /\n$/ ? "" : "\n") + nl = 1 + } + } else if(mode == "e") { + printf "%s", c } else { - print "ERROR: unknown tok_type: " tok_type >"/dev/stderr" + print "ERROR: unknown mode: " mode >"/dev/stderr" exit 1 } } === modified file 'hello.aat' --- hello.aat 2013-08-14 21:17:17 +0000 +++ hello.aat 2013-08-14 20:02:38 +0000 @@ -3,7 +3,7 @@ split("Jack Joe Jonathan", names) FOR name IN names %} -Hello {{name}}{{is_last ? "!" : ","}} +Hello {{name (is_last ? "!" : ",")}} {% ENDFOR %} Welcome to the world of {{toupper("awk")}} templating!