aat.awk (9493B)
1 #!/bin/awk -f 2 # vim: ft=awk noet sts=4 ts=4 sw=4 3 BEGIN { 4 tok_n = 0 # couter of tokens accumulated 5 6 # token types 7 T_TEXT = 1; type_names[1] = "text" 8 T_EXPR = 2; type_names[2] = "expr" 9 T_AWK = 3; type_names[3] = "awk" 10 T_FUNC = 4; type_names[4] = "func" 11 tok_type = T_TEXT 12 13 # get current directory 14 "pwd" | getline PWD 15 close("pwd") 16 17 # empty the depency file 18 if ("AAT_DEP" in ENVIRON) { 19 printf "" >ENVIRON["AAT_DEP"] 20 } 21 } 22 23 # Append 'content' to array of tokens. Token type is taken from current value 24 # of tok_type. It concatenates sequence of tokens of same type, unless 25 # tok_finished is set. 26 function token(content) { 27 if(!content) return 28 # concatenate tokens of same type if the previous one doesn't end in newline 29 if((!tok_finished[tok_n]) && tok_n && tok_type == tok_types[tok_n]) { 30 if(DEBUG) printf "concat \"%s\" \"%s\"\n", tok_contents[tok_n], content >"/dev/stderr" 31 tok_contents[tok_n] = tok_contents[tok_n] content 32 } else { 33 tok_types[++tok_n] = tok_type 34 tok_contents[tok_n] = content 35 } 36 if(DEBUG) printf "token %d (%s): \"%s\"\n", \ 37 tok_n, type_names[tok_type], tok_contents[tok_n] >"/dev/stderr" 38 } 39 40 # Print file dependencies for makefile usage 41 function print_dep(str) { 42 if(str !~ /^\//) { 43 str = PWD "/" str 44 } 45 if ("AAT_DEP" in ENVIRON) { 46 print str >>ENVIRON["AAT_DEP"] 47 } else { 48 print str >>"/dev/stderr" 49 } 50 } 51 52 function die(msg) { 53 print msg >>"/dev/stderr" 54 exit 1 55 } 56 57 function sh_escape(str) { 58 gsub(/["$\\]/, "\\&", str) 59 return "\"" str "\"" 60 } 61 62 # find a file relative to current filename and overwrite that variable 63 function find_file(name) { 64 if(DEBUG) printf "find_file(\"%s\") PWD=\"%s\"\n", name, PWD >"/dev/stderr" 65 # TODO: include search path 66 if(name ~ /^\//) { 67 # absolute path 68 filename = name 69 } else { 70 # relative path 71 if(match(filename, "/[^/]*$")) { 72 filename = substr(filename, 1, RSTART) name 73 } else { 74 filename = name 75 } 76 } 77 78 if(system("test -f " sh_escape(filename)) != 0) { 79 if("MAKE_CMD" in ENVIRON) { 80 if(system(ENVIRON["MAKE_CMD"] " " sh_escape(filename)) != 0) { 81 die("could not build requested file: " sh_escape(filename) " (" name ") PWD: " PWD) 82 } 83 } else { 84 die("could not find requested file: " sh_escape(filename) " (" name ") PWD: " PWD) 85 } 86 } 87 88 print_dep(filename) 89 } 90 91 function macro_readinto(args, varname, fname) { 92 varname = fname = args 93 sub(/[ \t].*$/, "", varname) 94 sub(/^[^ \t]+[ \t]+/, "", fname) 95 find_file(fname) 96 insert_comment("start @readinto "varname" "filename" {{{") 97 tok_type=T_AWK 98 while(getline <filename) { 99 gsub(/["\\]/, "\\&", $0) 100 token(varname " = \"" $0 "\\n\"\n") 101 } 102 close(filename) 103 insert_comment("}}} end @readinto "varname" "filename) 104 } 105 106 function insert_comment(str, tok_type_prev) { 107 tok_type_prev = tok_type 108 sub(/^/, "# ", str) 109 gsub(/\n/, "\n# ", str) 110 tok_type=T_AWK 111 token(str "\n") 112 tok_type = tok_type_prev 113 } 114 115 function call_macro(name, args, file_old) { 116 if(DEBUG) printf "call_macro(\"%s\", \"%s\")\n", name, args >"/dev/stderr" 117 118 # store current filename so macros can change it, restore before function exits 119 file_old = filename 120 121 # Macro to recursively parse another template 122 if(name == "include"){ 123 find_file(args) 124 insert_comment("start @include "filename" {{{") 125 while(getline <filename) { 126 parse_line($0) 127 } 128 close(filename) 129 insert_comment("}}} end @include "filename) 130 } 131 132 # Macro to insert another file as verbatim code 133 else if(name == "awk"){ 134 find_file(args) 135 insert_comment("start @awk "filename" {{{") 136 tok_type=T_AWK 137 while(getline <filename) { 138 token($0 "\n") 139 } 140 close(filename) 141 insert_comment("}}} end @awk "filename) 142 } 143 144 # Macro to insert another file as text 145 else if(name == "text"){ 146 find_file(args) 147 insert_comment("start @text "filename" {{{") 148 tok_type=T_TEXT 149 while(getline <filename) { 150 token($0 "\n") 151 tok_finished[tok_n] = 1 # break line 152 } 153 close(filename) 154 insert_comment("}}} end @text "filename) 155 } 156 157 # Macro to insert source filename as a variable into produced code 158 else if(name == "filename"){ 159 tok_type=T_AWK 160 args = filename 161 while(match(args, "[^/]+/../") != 0) { 162 gsub("[^/]+/../", "", args) 163 } 164 gsub(/["\\]/, "\\&", args) 165 token("filename = \"" args "\"\n") 166 } 167 168 # Macro to read content of a file into a variable 169 else if(name == "readinto"){ 170 macro_readinto(args) 171 } 172 173 # Assign a variable with query expression 174 # else if(name == "let"){ 175 # if (!match(args, / *= */)) { 176 # print "ERROR: invalid let statement: " args >"/dev/stderr" 177 # exit 1 178 # } 179 # tok_type=T_AWK 180 # token("V[\"" substr(args, 1, RSTART-1) "\"] = " \ 181 # substr(args, RSTART+RLENGTH)) "\n" 182 # # aat_process(substr(args, RSTART+RLENGTH)) "\n") 183 # } 184 185 # if with a query expression 186 else if(name == "if"){ 187 tok_type=T_AWK 188 token("if(<" substr(args, RSTART+RLENGTH) ">) {\n") 189 } 190 191 # else if with a query expression 192 else if(name == "elif"){ 193 tok_type=T_AWK 194 token("} else if(<" substr(args, RSTART+RLENGTH) ">) {\n") 195 } 196 197 # Leave the @ there for postprocessing with sed 198 else { 199 tok_type=T_AWK 200 token("@" name " " args "\n") 201 } 202 filename = file_old 203 } 204 205 function parse_line(line) { 206 # Handle linewise syntax 207 if(tok_type == T_TEXT) { 208 # if line starts with @@ or || it is actually an escape for having text 209 # start with single @ or | respectively 210 if(/^\(@@|\|\|\)/){ 211 line=substr(line, 2) 212 } 213 # Lines starting with @ are macros. Some are handled in call_macro, 214 # others currently by passing it through as awk code and postprocessing 215 # with sed. 216 else if(/^@/){ 217 match(substr(line, 2), "[^ \t]+") 218 call_macro( \ 219 substr(line, 1+RSTART, RLENGTH), \ 220 substr(line, 2+RSTART+RLENGTH) \ 221 ) 222 tok_type=T_TEXT 223 return 224 } 225 # Lines starting with | are considered verbatim awk code 226 else if(/^\|/) { 227 tok_type=T_AWK 228 token(substr(line, 2) "\n") 229 tok_type=T_TEXT 230 return 231 } 232 } 233 # Handle text, with interleaved blocks for code and expressions 234 while(length(line)) { 235 if(DEBUG) printf "%d: \"%s\"\n", tok_n, line >"/dev/stderr" 236 eat_nl = 0 237 if(tok_type == T_TEXT) { 238 # all text until a start of expression "{{", or start of awk code "{%" 239 m = match(line, /\{[{%<]/) 240 if(m) { 241 token(substr(line, 1, m-1)) 242 if (substr(line, m, RLENGTH) == "{{") 243 tok_type = T_EXPR 244 else if (substr(line, m, RLENGTH) == "{%") 245 tok_type = T_AWK 246 else if (substr(line, m, RLENGTH) == "{<") 247 tok_type = T_FUNC 248 else { print "internal error" >"/dev/stderr"; exit 1 } 249 line = substr(line, m+RLENGTH) 250 } else { 251 # no delimiter found, whole line is text 252 token(line) 253 line = "" 254 } 255 } else if(tok_type == T_FUNC) { 256 m = match(line, />}/) 257 if(m) { 258 if(DEBUG) printf "expr match: \"%s\"\n", substr(line, m, RLENGTH) >"/dev/stderr" 259 token(substr(line, 1, RSTART-1)) 260 tok_finished[tok_n] = 1 261 line = substr(line, RSTART+RLENGTH) 262 tok_type = T_TEXT 263 } else { 264 # did not match whole expression because of end of line 265 token(line) 266 line = "" 267 } 268 } else if(tok_type == T_EXPR || tok_type == T_AWK) { 269 # match text inside awk code or expression 270 # code stops on "%}" and expression on "}}" 271 # misses few corner cases handled in the ifs below 272 if(tok_type == T_EXPR) 273 m = match(line, /^(}?([^}"]|("([^"]|\\")*")))+/) 274 else 275 m = match(line, /^([^%"]|(%+[^}%"])|("([^"]|\\")*"))+/) 276 if(m) { 277 if(DEBUG) printf "expr match: \"%s\"\n", substr(line, m, RLENGTH) >"/dev/stderr" 278 token(substr(line, 1, RLENGTH)) 279 line = substr(line, RLENGTH+1) 280 } else if(length(line) == 1) { 281 # did not match whole expression because of end of line 282 token(line) 283 line = "" 284 } else if(match(line, /^%+$/)) { 285 # did not match whole code, as the above regexp fails on sequence of "%" at EOL 286 token(line) 287 line = "" 288 } else if( \ 289 (tok_type == T_EXPR && substr(line, 1, 2) == "}}") || \ 290 (tok_type == T_AWK && substr(line, 1, 2) == "%}") ) { 291 # end of expression / code block 292 if(tok_type == T_AWK) eat_nl = 1 293 tok_finished[tok_n] = 1 294 if(DEBUG) printf "finished (%s) \"%s\"\n", \ 295 type_names[tok_types[tok_n]], tok_contents[tok_n] >"/dev/stderr" 296 tok_type = T_TEXT 297 line = substr(line, 3) 298 } else { 299 print "ERROR: could not parse line " NR ": " line >"/dev/stderr" 300 exit 1 301 } 302 } else { 303 print "ERROR: unknown tok_type: " tok_type >"/dev/stderr" 304 exit 1 305 } 306 if(DEBUG) printf "-<%s>- \"%s\"\n", type_names[tok_type], line >"/dev/stderr" 307 } 308 309 # don't add newline just after the code block 310 if(eat_nl) 311 eat_nl = 0 312 else { 313 token("\n") 314 if(tok_type == T_TEXT) 315 tok_finished[tok_n] = 1 # end text tokens on newlines, so we get nicer output 316 } 317 } 318 319 # for every line in files in ARGV 320 { 321 # current filename being read 322 filename = FILENAME 323 324 # parse the line 325 parse_line($0) 326 } 327 328 # print the output 329 END { 330 nl = 1 # are we on new line? 331 for(tok_n=1; tok_types[tok_n]; tok_n++) { 332 tok_type = tok_types[tok_n] 333 c = tok_contents[tok_n] 334 if(nl && tok_type != T_AWK) { 335 printf "%s", "printf \"%s\"," 336 nl = 0 337 } 338 if(tok_type == T_TEXT) { 339 linebreak = match(c, "\n$") 340 gsub(/\\/, "\\\\", c) 341 gsub(/"/, "\\\"", c) 342 gsub(/\n/, "\\n", c) 343 printf " \"%s\"%s", c, (linebreak ? "\n" : "") 344 nl = linebreak 345 } else if(tok_type == T_AWK) { 346 printf "%s%s%s", (nl ? "" : "\n"), c, (c ~ /\n$/ ? "" : "\n") 347 nl = 1 348 } else if(tok_type == T_EXPR) { 349 printf " (%s)", c 350 } else if(tok_type == T_FUNC) { 351 # TODO 352 printf " (<%s>)", c 353 } else { 354 print "ERROR: unknown tok_type: " tok_type >"/dev/stderr" 355 exit 1 356 } 357 } 358 }