bin/aat.awk

#!/bin/awk -f
# vim: ft=awk noet sts=4 ts=4 sw=4
BEGIN {
	tok_n = 0  # couter of tokens accumulated

	# token types
	T_TEXT = 1; type_names[1] = "text"
	T_EXPR = 2; type_names[2] = "expr"
	T_AWK  = 3; type_names[3] = "awk"
	T_FUNC = 4; type_names[4] = "func"
	tok_type = T_TEXT

	# get current directory
	"pwd" | getline PWD
	close("pwd")

	# empty the depency file
	if ("AAT_DEP" in ENVIRON) {
		printf "" >ENVIRON["AAT_DEP"]
	}
}

# Append 'content' to array of tokens. Token type is taken from current value
# of tok_type. It concatenates sequence of tokens of same type, unless
# tok_finished is set.
function token(content) {
	if(!content) return
	# concatenate tokens of same type if the previous one doesn't end in newline
	if((!tok_finished[tok_n]) && tok_n && tok_type == tok_types[tok_n]) {
		if(DEBUG) printf "concat \"%s\" \"%s\"\n", tok_contents[tok_n], content >"/dev/stderr"
		tok_contents[tok_n] = tok_contents[tok_n] content
	} else {
		tok_types[++tok_n] = tok_type
		tok_contents[tok_n] = content
	}
	if(DEBUG) printf "token %d (%s): \"%s\"\n", \
		tok_n, type_names[tok_type], tok_contents[tok_n] >"/dev/stderr"
}

# Print file dependencies for makefile usage
function print_dep(str) {
	if(str !~ /^\//) {
		str = PWD "/" str
	}
	if ("AAT_DEP" in ENVIRON) {
		print str >>ENVIRON["AAT_DEP"]
	} else {
		print str >>"/dev/stderr"
	}
}

function die(msg) {
	print msg >>"/dev/stderr"
	exit 1
}

function sh_escape(str) {
	gsub(/["$\\]/, "\\&", str)
	return "\"" str "\""
}

# find a file relative to current filename and overwrite that variable
function find_file(name) {
	if(DEBUG) printf "find_file(\"%s\") PWD=\"%s\"\n", name, PWD >"/dev/stderr"
	# TODO: include search path
	if(name ~ /^\//) {
		# absolute path
		filename = name
	} else {
		# relative path
		if(match(filename, "/[^/]*$")) {
			filename = substr(filename, 1, RSTART) name
		} else {
			filename = name
		}
	}

	if(system("test -f " sh_escape(filename)) != 0) {
		if("MAKE_CMD" in ENVIRON) {
			if(system(ENVIRON["MAKE_CMD"] " " sh_escape(filename)) != 0) {
				die("could not build requested file: " sh_escape(filename) " (" name ") PWD: " PWD)
			}
		} else {
			die("could not find requested file: " sh_escape(filename) " (" name ") PWD: " PWD)
		}
	}

	print_dep(filename)
}

function macro_readinto(args,    varname, fname) {
	varname = fname = args
	sub(/[ \t].*$/, "", varname)
	sub(/^[^ \t]+[ \t]+/, "", fname)
	find_file(fname)
	insert_comment("start @readinto "varname" "filename" {{{")
	tok_type=T_AWK
	while(getline <filename) {
		gsub(/["\\]/, "\\&", $0)
		token(varname " = \"" $0 "\\n\"\n")
	}
	close(filename)
	insert_comment("}}} end @readinto "varname" "filename)
}

function insert_comment(str,    tok_type_prev) {
	tok_type_prev = tok_type
	sub(/^/, "# ", str)
	gsub(/\n/, "\n# ", str)
	tok_type=T_AWK
	token(str "\n")
	tok_type = tok_type_prev
}

function call_macro(name, args,    file_old) {
	if(DEBUG) printf "call_macro(\"%s\", \"%s\")\n", name, args >"/dev/stderr"

	# store current filename so macros can change it, restore before function exits
	file_old = filename

	# Macro to recursively parse another template
	if(name == "include"){
		find_file(args)
		insert_comment("start @include "filename" {{{")
		while(getline <filename) {
			parse_line($0)
		}
		close(filename)
		insert_comment("}}} end @include "filename)
	}

	# Macro to insert another file as verbatim code
	else if(name == "awk"){
		find_file(args)
		insert_comment("start @awk "filename" {{{")
		tok_type=T_AWK
		while(getline <filename) {
			token($0 "\n")
		}
		close(filename)
		insert_comment("}}} end @awk "filename)
	}

	# Macro to insert another file as text
	else if(name == "text"){
		find_file(args)
		insert_comment("start @text "filename" {{{")
		tok_type=T_TEXT
		while(getline <filename) {
			token($0 "\n")
			tok_finished[tok_n] = 1  # break line
		}
		close(filename)
		insert_comment("}}} end @text "filename)
	}

	# Macro to insert source filename as a variable into produced code
	else if(name == "filename"){
		tok_type=T_AWK
		args = filename
		while(match(args, "[^/]+/../") != 0) {
			gsub("[^/]+/../", "", args)
		}
		gsub(/["\\]/, "\\&", args)
		token("filename = \"" args "\"\n")
	}

	# Macro to read content of a file into a variable
	else if(name == "readinto"){
		macro_readinto(args)
	}

	# Assign a variable with query expression
	# else if(name == "let"){
	# 	if (!match(args, / *= */)) {
	# 		print "ERROR: invalid let statement: " args >"/dev/stderr"
	# 		exit 1
	# 	}
	# 	tok_type=T_AWK
	# 	token("V[\"" substr(args, 1, RSTART-1) "\"] = " \
	# 		  substr(args, RSTART+RLENGTH)) "\n"
	# 		  # aat_process(substr(args, RSTART+RLENGTH)) "\n")
	# }

	# if with a query expression
	else if(name == "if"){
		tok_type=T_AWK
		token("if(<" substr(args, RSTART+RLENGTH) ">) {\n")
	}

	# else if with a query expression
	else if(name == "elif"){
		tok_type=T_AWK
		token("} else if(<" substr(args, RSTART+RLENGTH) ">) {\n")
	}

	# Leave the @ there for postprocessing with sed
	else {
		tok_type=T_AWK
		token("@" name " " args "\n")
	}
	filename = file_old
}

function parse_line(line) {
	# Handle linewise syntax
	if(tok_type == T_TEXT) {
		# if line starts with @@ or || it is actually an escape for having text
		# start with single @ or | respectively
		if(/^\(@@|\|\|\)/){
			line=substr(line, 2)
		}
		# Lines starting with @ are macros. Some are handled in call_macro,
		# others currently by passing it through as awk code and postprocessing
		# with sed.
		else if(/^@/){
			match(substr(line, 2), "[^ \t]+")
			call_macro( \
				substr(line, 1+RSTART, RLENGTH), \
				substr(line, 2+RSTART+RLENGTH) \
			)
			tok_type=T_TEXT
			return
		}
		# Lines starting with | are considered verbatim awk code
		else if(/^\|/) {
			tok_type=T_AWK
			token(substr(line, 2) "\n")
			tok_type=T_TEXT
			return
		}
	}
	# Handle text, with interleaved blocks for code and expressions
	while(length(line)) {
		if(DEBUG) printf "%d: \"%s\"\n", tok_n, line >"/dev/stderr"
		eat_nl = 0
		if(tok_type == T_TEXT) {
			# all text until a start of expression "{{", or start of awk code "{%"
			m = match(line, /\{[{%<]/)
			if(m) {
				token(substr(line, 1, m-1))
				if (substr(line, m, RLENGTH) == "{{")
					tok_type = T_EXPR
				else if (substr(line, m, RLENGTH) == "{%")
					tok_type = T_AWK
				else if (substr(line, m, RLENGTH) == "{<")
					tok_type = T_FUNC
				else { print "internal error" >"/dev/stderr"; exit 1 }
				line = substr(line, m+RLENGTH)
			} else {
				# no delimiter found, whole line is text
				token(line)
				line = ""
			}
		} else if(tok_type == T_FUNC) {
			m = match(line, />}/)
			if(m) {
				if(DEBUG) printf "expr match: \"%s\"\n", substr(line, m, RLENGTH) >"/dev/stderr"
				token(substr(line, 1, RSTART-1))
				tok_finished[tok_n] = 1
				line = substr(line, RSTART+RLENGTH)
				tok_type = T_TEXT
			} else {
				# did not match whole expression because of end of line
				token(line)
				line = ""
			}
		} else if(tok_type == T_EXPR || tok_type == T_AWK) {
			# match text inside awk code or expression
			# code stops on "%}" and expression on "}}"
			# misses few corner cases handled in the ifs below
			if(tok_type == T_EXPR)
				m = match(line, /^(}?([^}"]|("([^"]|\\")*")))+/)
			else
				m = match(line, /^([^%"]|(%+[^}%"])|("([^"]|\\")*"))+/)
			if(m) {
				if(DEBUG) printf "expr match: \"%s\"\n", substr(line, m, RLENGTH) >"/dev/stderr"
				token(substr(line, 1, RLENGTH))
				line = substr(line, RLENGTH+1)
			} else if(length(line) == 1) {
				# did not match whole expression because of end of line
				token(line)
				line = ""
			} else if(match(line, /^%+$/)) {
				# did not match whole code, as the above regexp fails on sequence of "%" at EOL
				token(line)
				line = ""
			} else if( \
					(tok_type == T_EXPR && substr(line, 1, 2) == "}}") || \
					(tok_type == T_AWK && substr(line, 1, 2) == "%}") ) {
				# end of expression / code block
				if(tok_type == T_AWK) eat_nl = 1
				tok_finished[tok_n] = 1
				if(DEBUG) printf "finished (%s) \"%s\"\n", \
					type_names[tok_types[tok_n]], tok_contents[tok_n] >"/dev/stderr"
				tok_type = T_TEXT
				line = substr(line, 3)
			} else {
				print "ERROR: could not parse line " NR ": " line >"/dev/stderr"
				exit 1
			}
		} else {
			print "ERROR: unknown tok_type: " tok_type >"/dev/stderr"
			exit 1
		}
		if(DEBUG) printf "-<%s>- \"%s\"\n", type_names[tok_type], line >"/dev/stderr"
	}

	# don't add newline just after the code block
	if(eat_nl)
		eat_nl = 0
	else {
		token("\n")
		if(tok_type == T_TEXT)
			tok_finished[tok_n] = 1  # end text tokens on newlines, so we get nicer output
	}
}

# for every line in files in ARGV
{
	# current filename being read
	filename = FILENAME

	# parse the line
	parse_line($0)
}

# print the output
END {
	nl = 1  # are we on new line?
	for(tok_n=1; tok_types[tok_n]; tok_n++) {
		tok_type = tok_types[tok_n]
		c = tok_contents[tok_n]
		if(nl && tok_type != T_AWK) {
			printf "%s", "printf \"%s\","
			nl = 0
		}
		if(tok_type == T_TEXT) {
			linebreak = match(c, "\n$")
			gsub(/\\/, "\\\\", c)
			gsub(/"/,  "\\\"", c)
			gsub(/\n/,  "\\n", c)
			printf " \"%s\"%s", c, (linebreak ? "\n" : "")
			nl = linebreak
		} else if(tok_type == T_AWK) {
			printf "%s%s%s", (nl ? "" : "\n"), c, (c ~ /\n$/ ? "" : "\n")
			nl = 1
		} else if(tok_type == T_EXPR) {
			printf " (%s)", c
		} else if(tok_type == T_FUNC) {
			# TODO
			printf " (<%s>)", c
		} else {
			print "ERROR: unknown tok_type: " tok_type >"/dev/stderr"
			exit 1
		}
	}
}