aat

git mirror of https://ccx.te2000.cz/bzr/aat
git clone https://ccx.te2000.cz/git/aat
Log | Files | Refs | README

aat.awk (9493B)


      1 #!/bin/awk -f
      2 # vim: ft=awk noet sts=4 ts=4 sw=4
      3 BEGIN {
      4 	tok_n = 0  # couter of tokens accumulated
      5 
      6 	# token types
      7 	T_TEXT = 1; type_names[1] = "text"
      8 	T_EXPR = 2; type_names[2] = "expr"
      9 	T_AWK  = 3; type_names[3] = "awk"
     10 	T_FUNC = 4; type_names[4] = "func"
     11 	tok_type = T_TEXT
     12 
     13 	# get current directory
     14 	"pwd" | getline PWD
     15 	close("pwd")
     16 
     17 	# empty the depency file
     18 	if ("AAT_DEP" in ENVIRON) {
     19 		printf "" >ENVIRON["AAT_DEP"]
     20 	}
     21 }
     22 
     23 # Append 'content' to array of tokens. Token type is taken from current value
     24 # of tok_type. It concatenates sequence of tokens of same type, unless
     25 # tok_finished is set.
     26 function token(content) {
     27 	if(!content) return
     28 	# concatenate tokens of same type if the previous one doesn't end in newline
     29 	if((!tok_finished[tok_n]) && tok_n && tok_type == tok_types[tok_n]) {
     30 		if(DEBUG) printf "concat \"%s\" \"%s\"\n", tok_contents[tok_n], content >"/dev/stderr"
     31 		tok_contents[tok_n] = tok_contents[tok_n] content
     32 	} else {
     33 		tok_types[++tok_n] = tok_type
     34 		tok_contents[tok_n] = content
     35 	}
     36 	if(DEBUG) printf "token %d (%s): \"%s\"\n", \
     37 		tok_n, type_names[tok_type], tok_contents[tok_n] >"/dev/stderr"
     38 }
     39 
     40 # Print file dependencies for makefile usage
     41 function print_dep(str) {
     42 	if(str !~ /^\//) {
     43 		str = PWD "/" str
     44 	}
     45 	if ("AAT_DEP" in ENVIRON) {
     46 		print str >>ENVIRON["AAT_DEP"]
     47 	} else {
     48 		print str >>"/dev/stderr"
     49 	}
     50 }
     51 
     52 function die(msg) {
     53 	print msg >>"/dev/stderr"
     54 	exit 1
     55 }
     56 
     57 function sh_escape(str) {
     58 	gsub(/["$\\]/, "\\&", str)
     59 	return "\"" str "\""
     60 }
     61 
     62 # find a file relative to current filename and overwrite that variable
     63 function find_file(name) {
     64 	if(DEBUG) printf "find_file(\"%s\") PWD=\"%s\"\n", name, PWD >"/dev/stderr"
     65 	# TODO: include search path
     66 	if(name ~ /^\//) {
     67 		# absolute path
     68 		filename = name
     69 	} else {
     70 		# relative path
     71 		if(match(filename, "/[^/]*$")) {
     72 			filename = substr(filename, 1, RSTART) name
     73 		} else {
     74 			filename = name
     75 		}
     76 	}
     77 
     78 	if(system("test -f " sh_escape(filename)) != 0) {
     79 		if("MAKE_CMD" in ENVIRON) {
     80 			if(system(ENVIRON["MAKE_CMD"] " " sh_escape(filename)) != 0) {
     81 				die("could not build requested file: " sh_escape(filename) " (" name ") PWD: " PWD)
     82 			}
     83 		} else {
     84 			die("could not find requested file: " sh_escape(filename) " (" name ") PWD: " PWD)
     85 		}
     86 	}
     87 
     88 	print_dep(filename)
     89 }
     90 
     91 function macro_readinto(args,    varname, fname) {
     92 	varname = fname = args
     93 	sub(/[ \t].*$/, "", varname)
     94 	sub(/^[^ \t]+[ \t]+/, "", fname)
     95 	find_file(fname)
     96 	insert_comment("start @readinto "varname" "filename" {{{")
     97 	tok_type=T_AWK
     98 	while(getline <filename) {
     99 		gsub(/["\\]/, "\\&", $0)
    100 		token(varname " = \"" $0 "\\n\"\n")
    101 	}
    102 	close(filename)
    103 	insert_comment("}}} end @readinto "varname" "filename)
    104 }
    105 
    106 function insert_comment(str,    tok_type_prev) {
    107 	tok_type_prev = tok_type
    108 	sub(/^/, "# ", str)
    109 	gsub(/\n/, "\n# ", str)
    110 	tok_type=T_AWK
    111 	token(str "\n")
    112 	tok_type = tok_type_prev
    113 }
    114 
    115 function call_macro(name, args,    file_old) {
    116 	if(DEBUG) printf "call_macro(\"%s\", \"%s\")\n", name, args >"/dev/stderr"
    117 
    118 	# store current filename so macros can change it, restore before function exits
    119 	file_old = filename
    120 
    121 	# Macro to recursively parse another template
    122 	if(name == "include"){
    123 		find_file(args)
    124 		insert_comment("start @include "filename" {{{")
    125 		while(getline <filename) {
    126 			parse_line($0)
    127 		}
    128 		close(filename)
    129 		insert_comment("}}} end @include "filename)
    130 	}
    131 
    132 	# Macro to insert another file as verbatim code
    133 	else if(name == "awk"){
    134 		find_file(args)
    135 		insert_comment("start @awk "filename" {{{")
    136 		tok_type=T_AWK
    137 		while(getline <filename) {
    138 			token($0 "\n")
    139 		}
    140 		close(filename)
    141 		insert_comment("}}} end @awk "filename)
    142 	}
    143 
    144 	# Macro to insert another file as text
    145 	else if(name == "text"){
    146 		find_file(args)
    147 		insert_comment("start @text "filename" {{{")
    148 		tok_type=T_TEXT
    149 		while(getline <filename) {
    150 			token($0 "\n")
    151 			tok_finished[tok_n] = 1  # break line
    152 		}
    153 		close(filename)
    154 		insert_comment("}}} end @text "filename)
    155 	}
    156 
    157 	# Macro to insert source filename as a variable into produced code
    158 	else if(name == "filename"){
    159 		tok_type=T_AWK
    160 		args = filename
    161 		while(match(args, "[^/]+/../") != 0) {
    162 			gsub("[^/]+/../", "", args)
    163 		}
    164 		gsub(/["\\]/, "\\&", args)
    165 		token("filename = \"" args "\"\n")
    166 	}
    167 
    168 	# Macro to read content of a file into a variable
    169 	else if(name == "readinto"){
    170 		macro_readinto(args)
    171 	}
    172 
    173 	# Assign a variable with query expression
    174 	# else if(name == "let"){
    175 	# 	if (!match(args, / *= */)) {
    176 	# 		print "ERROR: invalid let statement: " args >"/dev/stderr"
    177 	# 		exit 1
    178 	# 	}
    179 	# 	tok_type=T_AWK
    180 	# 	token("V[\"" substr(args, 1, RSTART-1) "\"] = " \
    181 	# 		  substr(args, RSTART+RLENGTH)) "\n"
    182 	# 		  # aat_process(substr(args, RSTART+RLENGTH)) "\n")
    183 	# }
    184 
    185 	# if with a query expression
    186 	else if(name == "if"){
    187 		tok_type=T_AWK
    188 		token("if(<" substr(args, RSTART+RLENGTH) ">) {\n")
    189 	}
    190 
    191 	# else if with a query expression
    192 	else if(name == "elif"){
    193 		tok_type=T_AWK
    194 		token("} else if(<" substr(args, RSTART+RLENGTH) ">) {\n")
    195 	}
    196 
    197 	# Leave the @ there for postprocessing with sed
    198 	else {
    199 		tok_type=T_AWK
    200 		token("@" name " " args "\n")
    201 	}
    202 	filename = file_old
    203 }
    204 
    205 function parse_line(line) {
    206 	# Handle linewise syntax
    207 	if(tok_type == T_TEXT) {
    208 		# if line starts with @@ or || it is actually an escape for having text
    209 		# start with single @ or | respectively
    210 		if(/^\(@@|\|\|\)/){
    211 			line=substr(line, 2)
    212 		}
    213 		# Lines starting with @ are macros. Some are handled in call_macro,
    214 		# others currently by passing it through as awk code and postprocessing
    215 		# with sed.
    216 		else if(/^@/){
    217 			match(substr(line, 2), "[^ \t]+")
    218 			call_macro( \
    219 				substr(line, 1+RSTART, RLENGTH), \
    220 				substr(line, 2+RSTART+RLENGTH) \
    221 			)
    222 			tok_type=T_TEXT
    223 			return
    224 		}
    225 		# Lines starting with | are considered verbatim awk code
    226 		else if(/^\|/) {
    227 			tok_type=T_AWK
    228 			token(substr(line, 2) "\n")
    229 			tok_type=T_TEXT
    230 			return
    231 		}
    232 	}
    233 	# Handle text, with interleaved blocks for code and expressions
    234 	while(length(line)) {
    235 		if(DEBUG) printf "%d: \"%s\"\n", tok_n, line >"/dev/stderr"
    236 		eat_nl = 0
    237 		if(tok_type == T_TEXT) {
    238 			# all text until a start of expression "{{", or start of awk code "{%"
    239 			m = match(line, /\{[{%<]/)
    240 			if(m) {
    241 				token(substr(line, 1, m-1))
    242 				if (substr(line, m, RLENGTH) == "{{")
    243 					tok_type = T_EXPR
    244 				else if (substr(line, m, RLENGTH) == "{%")
    245 					tok_type = T_AWK
    246 				else if (substr(line, m, RLENGTH) == "{<")
    247 					tok_type = T_FUNC
    248 				else { print "internal error" >"/dev/stderr"; exit 1 }
    249 				line = substr(line, m+RLENGTH)
    250 			} else {
    251 				# no delimiter found, whole line is text
    252 				token(line)
    253 				line = ""
    254 			}
    255 		} else if(tok_type == T_FUNC) {
    256 			m = match(line, />}/)
    257 			if(m) {
    258 				if(DEBUG) printf "expr match: \"%s\"\n", substr(line, m, RLENGTH) >"/dev/stderr"
    259 				token(substr(line, 1, RSTART-1))
    260 				tok_finished[tok_n] = 1
    261 				line = substr(line, RSTART+RLENGTH)
    262 				tok_type = T_TEXT
    263 			} else {
    264 				# did not match whole expression because of end of line
    265 				token(line)
    266 				line = ""
    267 			}
    268 		} else if(tok_type == T_EXPR || tok_type == T_AWK) {
    269 			# match text inside awk code or expression
    270 			# code stops on "%}" and expression on "}}"
    271 			# misses few corner cases handled in the ifs below
    272 			if(tok_type == T_EXPR)
    273 				m = match(line, /^(}?([^}"]|("([^"]|\\")*")))+/)
    274 			else
    275 				m = match(line, /^([^%"]|(%+[^}%"])|("([^"]|\\")*"))+/)
    276 			if(m) {
    277 				if(DEBUG) printf "expr match: \"%s\"\n", substr(line, m, RLENGTH) >"/dev/stderr"
    278 				token(substr(line, 1, RLENGTH))
    279 				line = substr(line, RLENGTH+1)
    280 			} else if(length(line) == 1) {
    281 				# did not match whole expression because of end of line
    282 				token(line)
    283 				line = ""
    284 			} else if(match(line, /^%+$/)) {
    285 				# did not match whole code, as the above regexp fails on sequence of "%" at EOL
    286 				token(line)
    287 				line = ""
    288 			} else if( \
    289 					(tok_type == T_EXPR && substr(line, 1, 2) == "}}") || \
    290 					(tok_type == T_AWK && substr(line, 1, 2) == "%}") ) {
    291 				# end of expression / code block
    292 				if(tok_type == T_AWK) eat_nl = 1
    293 				tok_finished[tok_n] = 1
    294 				if(DEBUG) printf "finished (%s) \"%s\"\n", \
    295 					type_names[tok_types[tok_n]], tok_contents[tok_n] >"/dev/stderr"
    296 				tok_type = T_TEXT
    297 				line = substr(line, 3)
    298 			} else {
    299 				print "ERROR: could not parse line " NR ": " line >"/dev/stderr"
    300 				exit 1
    301 			}
    302 		} else {
    303 			print "ERROR: unknown tok_type: " tok_type >"/dev/stderr"
    304 			exit 1
    305 		}
    306 		if(DEBUG) printf "-<%s>- \"%s\"\n", type_names[tok_type], line >"/dev/stderr"
    307 	}
    308 
    309 	# don't add newline just after the code block
    310 	if(eat_nl)
    311 		eat_nl = 0
    312 	else {
    313 		token("\n")
    314 		if(tok_type == T_TEXT)
    315 			tok_finished[tok_n] = 1  # end text tokens on newlines, so we get nicer output
    316 	}
    317 }
    318 
    319 # for every line in files in ARGV
    320 {
    321 	# current filename being read
    322 	filename = FILENAME
    323 
    324 	# parse the line
    325 	parse_line($0)
    326 }
    327 
    328 # print the output
    329 END {
    330 	nl = 1  # are we on new line?
    331 	for(tok_n=1; tok_types[tok_n]; tok_n++) {
    332 		tok_type = tok_types[tok_n]
    333 		c = tok_contents[tok_n]
    334 		if(nl && tok_type != T_AWK) {
    335 			printf "%s", "printf \"%s\","
    336 			nl = 0
    337 		}
    338 		if(tok_type == T_TEXT) {
    339 			linebreak = match(c, "\n$")
    340 			gsub(/\\/, "\\\\", c)
    341 			gsub(/"/,  "\\\"", c)
    342 			gsub(/\n/,  "\\n", c)
    343 			printf " \"%s\"%s", c, (linebreak ? "\n" : "")
    344 			nl = linebreak
    345 		} else if(tok_type == T_AWK) {
    346 			printf "%s%s%s", (nl ? "" : "\n"), c, (c ~ /\n$/ ? "" : "\n")
    347 			nl = 1
    348 		} else if(tok_type == T_EXPR) {
    349 			printf " (%s)", c
    350 		} else if(tok_type == T_FUNC) {
    351 			# TODO
    352 			printf " (<%s>)", c
    353 		} else {
    354 			print "ERROR: unknown tok_type: " tok_type >"/dev/stderr"
    355 			exit 1
    356 		}
    357 	}
    358 }