h2ext.c (10702B)
1 // $Id$ 2 3 // Copyright (C) 2007 Daniel Hokka Zakrisson <daniel@hozac.com> 4 // 5 // This program is free software; you can redistribute it and/or modify 6 // it under the terms of the GNU General Public License as published by 7 // the Free Software Foundation; either version 2, or (at your option) 8 // any later version. 9 // 10 // This program is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU General Public License for more details. 14 // 15 // You should have received a copy of the GNU General Public License 16 // along with this program; if not, write to the Free Software 17 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 19 #ifdef HAVE_CONFIG_H 20 # include <config.h> 21 #endif 22 23 #include <stdio.h> 24 #include <stdlib.h> 25 #include <unistd.h> 26 #include <fcntl.h> 27 #include <string.h> 28 #include <sys/mman.h> 29 #include <sys/types.h> 30 #include <sys/stat.h> 31 #include <getopt.h> 32 #include <errno.h> 33 #include <ctype.h> 34 #include <sys/wait.h> 35 36 #include "util.h" 37 #include "lib/internal.h" 38 #include "pathconfig.h" 39 40 #define ENSC_WRAPPERS_PREFIX "h2ext: " 41 #define ENSC_WRAPPERS_UNISTD 1 42 #define ENSC_WRAPPERS_FCNTL 1 43 #define ENSC_WRAPPERS_STAT 1 44 #include <wrappers.h> 45 46 #define MAX_PEEK_SIZE 4096 47 #define MIN(a,b) (((a) > (b)) ? (b) : (a)) 48 #define STRINGIFY_(x) #x 49 #define STRINGIFY(x) STRINGIFY_(x) 50 51 struct file_format { 52 /* where the value would be in the file */ 53 long offset; 54 /* type of match */ 55 enum { 56 FFT_STRING = 1, 57 FFT_SHORT, 58 FFT_LONG, 59 FFT_LE = 0x4000, 60 FFT_BE = 0x8000, 61 } type; 62 /* the value */ 63 union { 64 char * st; 65 uint16_t sh; 66 uint32_t lo; 67 } value; 68 /* length of the value */ 69 size_t len; 70 /* program to use for extraction */ 71 char * extractor; 72 /* should we try to process the contents as well? */ 73 int peek_inside; 74 75 struct file_format *next; 76 }; 77 typedef struct file_format file_format_t; 78 79 int wrapper_exit_code = 255; 80 81 #define CMD_HELP 0x4001 82 #define CMD_VERSION 0x4002 83 84 struct option const 85 CMDLINE_OPTIONS[] = { 86 { "help", no_argument, 0, CMD_HELP }, 87 { "version", no_argument, 0, CMD_VERSION }, 88 { "desc", required_argument, 0, 'd' }, 89 { "silent", no_argument, 0, 'q' }, 90 { 0,0,0,0 }, 91 }; 92 93 static void 94 showHelp(int fd, char const *cmd, int res) 95 { 96 WRITE_MSG(fd, "Usage:\n "); 97 WRITE_STR(fd, cmd); 98 WRITE_MSG(fd, 99 " -d <descriptions file> <file1> [<file2>...]\n\n" 100 "Please report bugs to " PACKAGE_BUGREPORT "\n"); 101 102 exit(res); 103 } 104 105 static void 106 showVersion() 107 { 108 WRITE_MSG(1, 109 "h2ext " VERSION " -- determines how to extract a file\n" 110 "This program is part of " PACKAGE_STRING "\n\n" 111 "Copyright (C) 2007 Daniel Hokka Zakrisson\n" 112 VERSION_COPYRIGHT_DISCLAIMER); 113 exit(0); 114 } 115 116 static file_format_t * 117 find_format(file_format_t *head, char *data) 118 { 119 file_format_t *i; 120 121 for (i = head; i; i = i->next) { 122 switch (i->type & ~(FFT_LE|FFT_BE)) { 123 case FFT_STRING: 124 if (memcmp(i->value.st, data + i->offset, i->len) == 0) 125 goto found; 126 break; 127 case FFT_SHORT: 128 if (i->value.sh == *((__typeof__(i->value.sh) *)data + i->offset)) 129 goto found; 130 break; 131 case FFT_LONG: 132 if (i->value.lo == *((__typeof__(i->value.lo) *)data + i->offset)) 133 goto found; 134 break; 135 } 136 } 137 found: 138 return i; 139 } 140 141 static int 142 process_file(file_format_t *head, const char *file, file_format_t *ret[2]) 143 { 144 int fd; 145 void *mapping; 146 struct stat st; 147 148 fd = EopenD(file, O_RDONLY, 0); 149 Efstat(fd, &st); 150 mapping = mmap(NULL, MIN(st.st_size, MAX_PEEK_SIZE), PROT_READ, MAP_SHARED, fd, 0); 151 if (mapping == MAP_FAILED) { 152 perror("mmap()"); 153 Eclose(fd); 154 return -1; 155 } 156 157 ret[0] = find_format(head, mapping); 158 159 munmap(mapping, MIN(st.st_size, MAX_PEEK_SIZE)); 160 161 if (ret[0] && ret[0]->peek_inside) { 162 pid_t child; 163 int fds[2]; 164 165 Elseek(fd, 0, SEEK_SET); 166 167 Epipe(fds); 168 child = Efork(); 169 if (child == 0) { 170 char *argv[3] = { PROG_H2EXT_WORKER, ret[0]->extractor, NULL }; 171 dup2(fd, 0); 172 dup2(fds[1], 1); 173 EexecvpD(PROG_H2EXT_WORKER, argv); 174 } 175 else { 176 char *buf = calloc(MAX_PEEK_SIZE, sizeof(char)), *cur, *end; 177 ssize_t bytes_read; 178 179 /* read MAX_PEEK_SIZE bytes from the decompressor */ 180 cur = buf; 181 end = buf + MAX_PEEK_SIZE; 182 while (cur < end && (bytes_read = Eread(fds[0], cur, end - cur - 1)) > 0) 183 cur += bytes_read; 184 185 /* get rid of the child */ 186 kill(child, SIGTERM); 187 wait(NULL); 188 189 ret[1] = find_format(head, buf); 190 free(buf); 191 } 192 } 193 else 194 ret[1] = NULL; 195 196 Eclose(fd); 197 198 return 0; 199 } 200 201 static inline void 202 byteswap(void *p, size_t len) 203 { 204 size_t i; 205 char *buf = p, tmp; 206 for (i = 0; i < (len >> 1); i++) { 207 tmp = buf[len - i - 1]; 208 buf[len - i - 1] = buf[i]; 209 buf[i] = tmp; 210 } 211 } 212 213 static int 214 load_description(const char *file, file_format_t **head) 215 { 216 file_format_t *prev = NULL, 217 *i = NULL; 218 int fd, 219 line_no = 0; 220 char buf[512], 221 *field, 222 *end = buf, 223 *ptr, 224 *eol; 225 ssize_t bytes_read; 226 227 fd = EopenD(file, O_RDONLY, 0); 228 229 *buf = '\0'; 230 while (1) { 231 if ((eol = strchr(buf, '\n')) == NULL && (end - buf) < (signed) (sizeof(buf) - 1)) { 232 bytes_read = Eread(fd, end, sizeof(buf) - 1 - (end - buf)); 233 /* EOF, implicit newline */ 234 if (bytes_read == 0) { 235 if (end == buf) 236 break; 237 eol = end; 238 *(end++) = '\n'; 239 } 240 end += bytes_read; 241 *end = '\0'; 242 continue; 243 } 244 else if (eol == NULL) { 245 WRITE_MSG(2, ENSC_WRAPPERS_PREFIX); 246 WRITE_STR(2, file); 247 WRITE_MSG(2, ":"); 248 WRITE_INT(2, line_no); 249 WRITE_MSG(2, " is a really long line\n"); 250 Eclose(fd); 251 return -1; 252 } 253 *eol = '\0'; 254 line_no++; 255 256 if (*buf == '#' || *buf == '\0') 257 goto new_line; 258 if (*head == NULL) 259 i = *head = calloc(1, sizeof(file_format_t)); 260 else { 261 i->next = calloc(1, sizeof(file_format_t)); 262 prev = i; 263 i = i->next; 264 } 265 i->next = NULL; 266 267 #define get_field() if (*(ptr+1) == '\0') goto new_line_and_free; \ 268 for (ptr++; *ptr == '\t' && *ptr != '\0'; ptr++); \ 269 for (field = ptr; *ptr != '\t' && *ptr != '\0'; ptr++); \ 270 *ptr = '\0'; 271 field = ptr = buf; 272 while (*ptr != '\t' && *ptr != '\0') 273 ptr++; 274 *ptr = '\0'; 275 if (field == ptr) 276 goto new_line_and_free; 277 i->offset = strtol(field, NULL, 0); 278 279 get_field(); 280 if (strcmp(field, "string") == 0) 281 i->type = FFT_STRING; 282 else if (strcmp(field, "short") == 0) 283 i->type = FFT_SHORT; 284 else if (strcmp(field, "long") == 0) 285 i->type = FFT_LONG; 286 else if (strcmp(field, "leshort") == 0) 287 i->type = FFT_SHORT|FFT_LE; 288 else if (strcmp(field, "beshort") == 0) 289 i->type = FFT_SHORT|FFT_BE; 290 else if (strcmp(field, "lelong") == 0) 291 i->type = FFT_LONG|FFT_LE; 292 else if (strcmp(field, "belong") == 0) 293 i->type = FFT_LONG|FFT_BE; 294 else { 295 WRITE_MSG(2, ENSC_WRAPPERS_PREFIX); 296 WRITE_STR(2, file); 297 WRITE_MSG(2, ":"); 298 WRITE_INT(2, line_no); 299 WRITE_MSG(2, " has an unknown type: "); 300 WRITE_STR(2, field); 301 WRITE_MSG(2, "\n"); 302 goto new_line_and_free; 303 } 304 305 get_field(); 306 switch (i->type & ~(FFT_BE|FFT_LE)) { 307 case FFT_STRING: 308 { 309 char *c, *tmp; 310 i->value.st = tmp = calloc(strlen(field) + 1, sizeof(char)); 311 for (c = field; *c; c++) { 312 if (*c == '\\') { 313 char *endptr; 314 *(tmp++) = (char)strtol(c + 1, &endptr, 8); 315 c = endptr - 1; 316 } 317 else 318 *(tmp++) = *c; 319 } 320 *tmp = '\0'; 321 i->len = tmp - i->value.st; 322 } 323 break; 324 case FFT_SHORT: 325 i->len = sizeof(i->value.sh); 326 i->value.sh = (__typeof__(i->value.sh))strtol(field, NULL, 0); 327 #if BYTE_ORDER != BIG_ENDIAN 328 if (i->type & FFT_BE) 329 #elif BYTE_ORDER != LITTLE_ENDIAN 330 if (i->type & FFT_LE) 331 #else 332 # error UNKNOWN BYTE ORDER 333 #endif 334 byteswap(&i->value.sh, i->len); 335 break; 336 case FFT_LONG: 337 i->len = sizeof(i->value.lo); 338 i->value.lo = (__typeof__(i->value.lo))strtol(field, NULL, 0); 339 #if BYTE_ORDER != BIG_ENDIAN 340 if (i->type & FFT_BE) 341 #elif BYTE_ORDER != LITTLE_ENDIAN 342 if (i->type & FFT_LE) 343 #else 344 # error UNKNOWN BYTE ORDER 345 #endif 346 byteswap(&i->value.lo, i->len); 347 break; 348 } 349 350 get_field(); 351 i->extractor = strdup(field); 352 353 get_field(); 354 i->peek_inside = (int)strtol(field, NULL, 0); 355 356 /* sanity check the entry */ 357 if (i->offset < 0) { 358 WRITE_MSG(2, ENSC_WRAPPERS_PREFIX); 359 WRITE_STR(2, file); 360 WRITE_MSG(2, ":"); 361 WRITE_INT(2, line_no); 362 WRITE_MSG(2, " has an invalid offset: "); 363 WRITE_INT(2, i->offset); 364 WRITE_MSG(2, "\n"); 365 goto new_line_and_free; 366 } 367 else if ((i->offset + i->len) > MAX_PEEK_SIZE) { 368 WRITE_MSG(2, ENSC_WRAPPERS_PREFIX); 369 WRITE_STR(2, file); 370 WRITE_MSG(2, ":"); 371 WRITE_INT(2, line_no); 372 WRITE_MSG(2, " exceeds maximum offset (" STRINGIFY(MAX_PEEK_SIZE) ")\n"); 373 goto new_line_and_free; 374 } 375 #undef get_field 376 goto new_line; 377 378 new_line_and_free: 379 free(i); 380 if (prev) { 381 i = prev; 382 free(i->next); 383 i->next = NULL; 384 } 385 else 386 *head = i = NULL; 387 new_line: 388 memmove(buf, eol + 1, end - (eol + 1)); 389 end = buf + (end - (eol + 1)); 390 } 391 392 Eclose(fd); 393 return 0; 394 } 395 396 int main(int argc, char *argv[]) 397 { 398 char **file = NULL, 399 *desc = NULL; 400 file_format_t *head = NULL; 401 int quiet = 0; 402 403 while (1) { 404 int c = getopt_long(argc, argv, "+d:q", CMDLINE_OPTIONS, 0); 405 if (c == -1) break; 406 407 switch (c) { 408 case CMD_HELP: showHelp(1, argv[0], 0); 409 case CMD_VERSION: showVersion(); 410 case 'd': desc = optarg; break; 411 case 'q': quiet = 1; break; 412 default: 413 WRITE_MSG(2, "Try '"); 414 WRITE_STR(2, argv[0]); 415 WRITE_MSG(2, " --help' for more information.\n"); 416 return wrapper_exit_code; 417 } 418 } 419 420 if (desc == NULL) { 421 WRITE_MSG(2, "No descriptions supplied, try '"); 422 WRITE_STR(2, argv[0]); 423 WRITE_MSG(2, " --help' for more information.\n"); 424 return wrapper_exit_code; 425 } 426 427 head = NULL; 428 if (load_description(desc, &head) == -1) 429 return EXIT_FAILURE; 430 431 for (file = argv + optind; *file; file++) { 432 file_format_t *formats[2]; 433 if (!quiet) { 434 WRITE_STR(1, *file); 435 WRITE_MSG(1, ": "); 436 } 437 if (!process_file(head, *file, formats) && formats[0]) { 438 WRITE_STR(1, formats[0]->extractor); 439 if (formats[0]->peek_inside) { 440 WRITE_MSG(1, " | "); 441 WRITE_STR(1, formats[1] ? formats[1]->extractor : "unknown format"); 442 } 443 } 444 else 445 WRITE_MSG(1, "unknown format"); 446 WRITE_MSG(1, "\n"); 447 } 448 449 return 0; 450 }