comment the code a bit, rename some stuff - aat - git mirror of https://ccx.te2000.cz/bzr/aat

commit 1d506245121a04ab21c83c5b2d26310703cfc3c5
parent 6d71aca625db7dea237604472baa55c9f55c84f5
Author: Jan Pobrislo <ccx@webprojekty.cz>
Date:   Wed, 14 Aug 2013 23:17:17 +0200

comment the code a bit, rename some stuff
Diffstat:
M bin/aat.awk  | 127 ++++++++++++++++++++++++++++++++++---------------------------------------------
M hello.aat  | 2 +-

2 files changed, 55 insertions(+), 74 deletions(-)
diff --git a/bin/aat.awk b/bin/aat.awk
@@ -1,19 +1,26 @@
 #!/bin/awk -f
 BEGIN {
-	tok_n = 0
-	mode = "t"
+	tok_n = 0  # couter of tokens accumulated
+
+	# token types
+	T_TEXT = 1; type_names[1] = "text"
+	T_EXPR = 2; type_names[2] = "expr"
+	T_AWK  = 3; type_names[3] = "awk"
+	tok_type = T_TEXT
 }
 
 function token(content) {
 	if(!content) return
-	if(tok_n && mode == tok_type[tok_n] && tok_content[tok_n] !~ "\n$") {
-		#if(DEBUG) printf "concat \"%s\" \"%s\"\n", tok_content[tok_n], content
-		tok_content[tok_n] = tok_content[tok_n] content
+	# concatenate tokens of same type if the previous one doesn't end in newline
+	if((!tok_finished[tok_n]) && tok_n && tok_type == tok_types[tok_n]) {
+		if(DEBUG) printf "concat \"%s\" \"%s\"\n", tok_contents[tok_n], content
+		tok_contents[tok_n] = tok_contents[tok_n] content
 	} else {
-		tok_type[++tok_n] = mode
-		tok_content[tok_n] = content
+		tok_types[++tok_n] = tok_type
+		tok_contents[tok_n] = content
 	}
-	if(DEBUG) printf "token %d (%s): \"%s\"\n", tok_n, mode, tok_content[tok_n] >"/dev/stderr"
+	if(DEBUG) printf "token %d (%s): \"%s\"\n", \
+	   tok_n, type_names[tok_type], tok_contents[tok_n] >"/dev/stderr"
 }
 
 {
@@ -21,22 +28,26 @@ function token(content) {
 	while(length(line)) {
 		if(DEBUG) printf "%d: \"%s\"\n", tok_n, line >"/dev/stderr"
 		eat_nl = 0
-		if(mode == "t") {
+		if(tok_type == T_TEXT) {
+			# all text until a start of expression "{{", or start of awk code "{%"
 			m = match(line, /\{[{%]/)
 			if(m) {
-				# start expression
 				token(substr(line, 1, m-1))
 				if (substr(line, m, RLENGTH) == "{{")
-					mode = "e"
+					tok_type = T_EXPR
 				else
-					mode = "a"
+					tok_type = T_AWK
 				line = substr(line, m+RLENGTH)
 			} else {
+				# no delimiter found, whole line is text
 				token(line)
 				line = ""
 			}
-		} else if(mode == "e" || mode == "a") {
-			if(mode == "e")
+		} else if(tok_type == T_EXPR || tok_type == T_AWK) {
+			# match text inside awk code or expression
+			# code stops on "%}" and expression on "}}"
+			# misses few corner cases handled in the ifs below
+			if(tok_type == T_EXPR)
 				m = match(line, /^(}?([^}"]|("([^"]|\\")*")))+/)
 			else
 				m = match(line, /^([^%"]|(%+[^}%"])|("([^"]|\\")*"))+/)
@@ -45,97 +56,67 @@ function token(content) {
 				token(substr(line, 1, RLENGTH))
 				line = substr(line, RLENGTH+1)
 			} else if(length(line) == 1) {
-				# end of line
+				# did not match whole expression because of end of line
 				token(line)
 				line = ""
 			} else if(match(line, /^%+$/)) {
+				# did not match whole code, as the above regexp fails on sequence of "%" at EOL
 				token(line)
 				line = ""
 			} else if( \
-					(mode == "e" && substr(line, 1, 2) == "}}") || \
-					(mode == "a" && substr(line, 1, 2) == "%}") ) {
-				# end of expression
-				if(mode == "a") eat_nl = 1
-				mode = "t"
+					(tok_type == T_EXPR && substr(line, 1, 2) == "}}") || \
+					(tok_type == T_AWK && substr(line, 1, 2) == "%}") ) {
+				# end of expression / code block
+				if(tok_type == T_AWK) eat_nl = 1
+				tok_finished[tok_n] = 1
+				if(DEBUG) printf "finished (%s) \"%s\"\n", \
+					type_names[tok_types[tok_n]], tok_contents[tok_n] >"/dev/stderr"
+				tok_type = T_TEXT
 				line = substr(line, 3)
 			} else {
 				print "ERROR: could not parse line " NR ": " line >"/dev/stderr"
 				exit 1
 			}
 		} else {
-			print "ERROR: unknown mode: " mode >"/dev/stderr"
+			print "ERROR: unknown tok_type: " tok_type >"/dev/stderr"
 			exit 1
 		}
-		if(DEBUG) printf "-<%s>- \"%s\"\n", mode, line >"/dev/stderr"
+		if(DEBUG) printf "-<%s>- \"%s\"\n", type_names[tok_type], line >"/dev/stderr"
 	}
 
 	# don't add newline just after the code block
 	if(eat_nl)
 		eat_nl = 0
-	else
+	else {
 		token("\n")
-}
-
-function apply_macros(c) {
-	orig = c
-	for(man_apply_n in macros) {
-		m = macros[man_apply_n]
-		if(!match(m, /^\/([^\/]|(\\\/))+\//)) {
-			print "ERROR: invalid macro regexp: " m >"/dev/stderr"
-			exit 1
-		}
-		regexp = substr(m, 2, RLENGTH-2)
-		m = substr(m, RLENGTH+1)
-		if(!match(m, /^([^\/]|(\\\/))*\//)) {
-			print "ERROR: invalid macro replacement: " m >"/dev/stderr"
-			exit 1
-		}
-		replacement = substr(m, 1, RLENGTH-1)
-		flags = substr(m, RLENGTH)
-		# gensub() is not posix
-		if(match(flags, /g/))
-			sub_count = gsub(regexp, replacement, c)
-		else
-			sub_count = sub(regexp, replacement, c)
-		if(DEBUG && sub_count) printf "code \"%s\" matched macro /%s/, replacing with \"%s\", flags \"%s\"\n", orig, regexp, replacement, flags
-		if(sub_count) {
-			if(match(flags, /r/))
-				return apply_macros(c)
-			return c
-		}
+		if(tok_type == T_TEXT)
+			tok_finished[tok_n] = 1  # end text tokens on newlines, so we get nicer output
 	}
-	return c
 }
 
 END {
-	nl = 1
-	mac_n = 0
-	for(tok_n=1; tok_type[tok_n]; tok_n++) {
-		mode = tok_type[tok_n]
-		c = tok_content[tok_n]
-		if(nl && mode != "a") {
-			printf "%s", "printf \"%s\", "
+	nl = 1  # are we on new line?
+	for(tok_n=1; tok_types[tok_n]; tok_n++) {
+		tok_type = tok_types[tok_n]
+		c = tok_contents[tok_n]
+		if(nl && tok_type != T_AWK) {
+			printf "%s", "printf \"%s\","
 			nl = 0
 		}
-		if(mode == "t") {
+		if(tok_type == T_TEXT) {
 			linebreak = match(c, "\n$")
 			gsub(/\\/, "\\\\", c)
 			gsub(/"/,  "\\\"", c)
 			gsub(/\n/,  "\\n", c)
-			printf "\"%s\"%s", c, (linebreak ? "\n" : "")
+			printf " \"%s\"%s", c, (linebreak ? "\n" : "")
 			nl = linebreak
-		} else if(mode == "a") {
-			if(match(c, /\/([^\/]|(\\\/))+\/([^\/]|(\\\/))*\/[[:alpha:]]*/)) {
-				macros[++mac_n] = c
-			} else {
-				c = apply_macros(c)
-				printf "%s%s%s", (nl ? "" : "\n"), c, (c ~ /\n$/ ? "" : "\n")
-				nl = 1
-			}
-		} else if(mode == "e") {
-			printf "%s", c
+		} else if(tok_type == T_AWK) {
+			printf "%s%s%s", (nl ? "" : "\n"), c, (c ~ /\n$/ ? "" : "\n")
+			nl = 1
+		} else if(tok_type == T_EXPR) {
+			printf " (%s)", c
 		} else {
-			print "ERROR: unknown mode: " mode >"/dev/stderr"
+			print "ERROR: unknown tok_type: " tok_type >"/dev/stderr"
 			exit 1
 		}
 	}
diff --git a/hello.aat b/hello.aat
@@ -3,7 +3,7 @@ BEGIN {
 split("Jack Joe Jonathan", names)
 FOR name IN names
 %}
-Hello {{name (is_last ? "!" : ",")}}
+Hello {{name}}{{is_last ? "!" : ","}}
 {% ENDFOR %}
 
 Welcome to the world of {{toupper("awk")}} templating!

	aat git mirror of https://ccx.te2000.cz/bzr/aat
	git clone https://ccx.te2000.cz/git/aat
	Log \| Files \| Refs \| README

M	bin/aat.awk	\|	127	++++++++++++++++++++++++++++++++++---------------------------------------------
M	hello.aat	\|	2	+-