vhashify.c (17956B)
1 // $Id$ --*- c -*-- 2 3 // Copyright (C) 2005 Enrico Scholz <enrico.scholz@informatik.tu-chemnitz.de> 4 // 5 // This program is free software; you can redistribute it and/or modify 6 // it under the terms of the GNU General Public License as published by 7 // the Free Software Foundation; version 2 of the License. 8 // 9 // This program is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU General Public License for more details. 13 // 14 // You should have received a copy of the GNU General Public License 15 // along with this program; if not, write to the Free Software 16 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 18 19 #ifdef HAVE_CONFIG_H 20 # include <config.h> 21 #endif 22 23 #define UTIL_VSERVER_UNIFY_MTIME_OPTIONAL 24 25 #include "vhashify.h" 26 #include "util.h" 27 28 #include "lib/internal.h" 29 #include "lib_internal/matchlist.h" 30 #include "lib_internal/unify.h" 31 #include "ensc_vector/vector.h" 32 33 #include "lib_internal/crypto-wrapper.h" 34 35 #include <setjmp.h> 36 #include <unistd.h> 37 #include <getopt.h> 38 #include <string.h> 39 #include <assert.h> 40 #include <stdlib.h> 41 #include <fcntl.h> 42 #include <dirent.h> 43 #include <errno.h> 44 #include <signal.h> 45 #include <limits.h> 46 #include <sys/mman.h> 47 #include <sys/stat.h> 48 49 #define ENSC_WRAPPERS_STDLIB 1 50 #define ENSC_WRAPPERS_UNISTD 1 51 #define ENSC_WRAPPERS_FCNTL 1 52 #define ENSC_WRAPPERS_DIRENT 1 53 #define ENSC_WRAPPERS_IO 1 54 #include <wrappers.h> 55 56 57 #define HASH_BLOCKSIZE 0x10000000u 58 #define HASH_MINSIZE 0x10 59 #define HASH_MAXBITS 256 // we have to take care about 60 // max filename-length... 61 62 #if HASH_MINSIZE<=0 63 # error HASH_MINSIZE must be not '0' 64 #endif 65 66 67 #define CMD_HELP 0x8000 68 #define CMD_VERSION 0x8001 69 70 #define CMD_DESTINATION 0x1000 71 #define CMD_INSECURE 0x1001 72 #define CMD_SLEDGE 0x1002 73 #define CMD_MANUALLY 0x1003 74 #define CMD_REFRESH 0x1004 75 #define CMD_NOMTIME 0x1005 76 77 struct option const 78 CMDLINE_OPTIONS[] = { 79 { "help", no_argument, 0, CMD_HELP }, 80 { "version", no_argument, 0, CMD_VERSION }, 81 { "destination", required_argument, 0, CMD_DESTINATION }, 82 { "insecure", no_argument, 0, CMD_INSECURE }, 83 { "sledgehammer", no_argument, 0, CMD_SLEDGE }, 84 { "manually", no_argument, 0, CMD_MANUALLY }, 85 { "refresh", no_argument, 0, CMD_REFRESH }, 86 { "ignore-mtime", no_argument, 0, CMD_NOMTIME }, 87 { "dry-run", no_argument, 0, 'n' }, 88 { "verbose", no_argument, 0, 'v' }, 89 { 0,0,0,0 } 90 }; 91 92 // hash digest grouped by 2 digits + hash-collision counter + 2* '/' + NULL 93 typedef char HashPath[HASH_MAXBITS/4 + (HASH_MAXBITS/4/2) + 94 sizeof(unsigned int)*2 + 3]; 95 96 struct HashDirConfiguration 97 { 98 ensc_hash_method const *method; 99 enum { hshALL=0, hshSTART = 1, hshMIDDLE=2, 100 hshEND = 4, hshINVALID = -1 } blocks; 101 size_t blocksize; 102 }; 103 104 struct WalkdownInfo 105 { 106 PathInfo state; 107 struct MatchList dst_list; 108 struct HashDirConfiguration hash_conf; 109 HashDirCollection hash_dirs; 110 size_t hash_dirs_max_size; 111 112 ensc_hash_context hash_context; 113 }; 114 115 int wrapper_exit_code = 1; 116 struct Arguments const *global_args; 117 static struct SkipReason skip_reason; 118 119 struct WalkdownInfo global_info = { 120 .hash_conf = { .method = 0, 121 .blocks = hshALL, 122 .blocksize = 0x10000 } 123 }; 124 125 #include "vhashify-init.hc" 126 127 int Global_getVerbosity() { 128 return global_args->verbosity; 129 } 130 131 int Global_doRenew() { 132 return true; 133 } 134 135 int Global_isVserverRunning() { 136 // TODO 137 return global_args->insecure<2; 138 } 139 140 static void 141 showHelp(char const *cmd) 142 { 143 WRITE_MSG(1, "Usage:\n "); 144 WRITE_STR(1, cmd); 145 WRITE_MSG(1, 146 " [-nv] [--refresh] <vserver>\n or\n "); 147 WRITE_STR(1, cmd); 148 WRITE_MSG(1, 149 " --manually [-nv] [--] <hashdir> <path> <excludelist>\n\n" 150 " --manually ... hashify generic paths; excludelists must be generated\n" 151 " manually\n" 152 " --refresh ... hashify already hashified files also\n" 153 " -n ... do not modify anything; just show what there will be\n" 154 " done (in combination with '-v')\n" 155 " -v ... verbose mode\n" 156 "Please report bugs to " PACKAGE_BUGREPORT "\n"); 157 158 exit(0); 159 } 160 161 static void 162 showVersion() 163 { 164 WRITE_MSG(1, 165 "vhashify " VERSION " -- hashifies vservers and/or directories\n" 166 "This program is part of " PACKAGE_STRING "\n\n" 167 "Copyright (C) 2005 Enrico Scholz\n" 168 VERSION_COPYRIGHT_DISCLAIMER); 169 exit(0); 170 } 171 172 int 173 HashDirInfo_compareDevice(void const *lhs_v, void const *rhs_v) 174 { 175 struct HashDirInfo const * const lhs = lhs_v; 176 dev_t const * const rhs = rhs_v; 177 178 assert(lhs!=0 && rhs!=0); 179 return lhs->device - *rhs; 180 } 181 182 PathInfo const * 183 HashDirInfo_findDevice(HashDirCollection const *coll, dev_t dev) 184 { 185 struct HashDirInfo const *res; 186 187 res = Vector_searchSelfOrg_const(coll, &dev, 188 HashDirInfo_compareDevice, vecSHIFT_ONCE); 189 190 if (res!=0) return &res->path; 191 else return 0; 192 } 193 194 #include "vserver-visitdir.hc" 195 196 static bool 197 checkFstat(PathInfo const * const basename, 198 struct stat * const st) 199 { 200 assert(basename->d[0] != '/'); 201 202 // local file does not exist... strange 203 // TODO: message 204 skip_reason.r = rsFSTAT; 205 if (lstat(basename->d, st)==-1) return false; 206 207 // this is a directory and succeeds everytime 208 if (S_ISDIR(st->st_mode)) 209 return true; 210 211 // ignore symlinks 212 skip_reason.r = rsSYMLINK; 213 if (S_ISLNK(st->st_mode)) return false; 214 215 // ignore special files 216 skip_reason.r = rsSPECIAL; 217 if (!S_ISREG(st->st_mode) && 218 !S_ISDIR(st->st_mode)) return false; 219 220 // ignore small files 221 skip_reason.r = rsTOOSMALL; 222 if (st->st_size < HASH_MINSIZE) return false; 223 224 switch (Unify_isIUnlinkable(basename->d)) { 225 case unifyUNSUPPORTED : skip_reason.r = rsUNSUPPORTED; return false; 226 case unifyBUSY : 227 // do an implicit refresh on busy files when there are no active links 228 if (st->st_nlink>1 && !global_args->do_refresh) { 229 // TODO: message 230 skip_reason.r = rsUNIFIED; 231 return false; 232 } 233 break; 234 default : break; 235 } 236 237 return true; 238 } 239 240 static sigjmp_buf bus_error_restore; 241 static volatile sig_atomic_t bus_error; 242 243 static void 244 handlerSIGBUS(int UNUSED num) 245 { 246 bus_error = 1; 247 siglongjmp(bus_error_restore, 1); 248 } 249 250 static bool 251 convertDigest(HashPath d_path) 252 { 253 static char const HEX_DIGIT[] = "0123456789abcdef"; 254 ensc_hash_context * const h_ctx = &global_info.hash_context; 255 size_t d_size = ensc_crypto_hashctx_get_digestsize(h_ctx); 256 257 unsigned char digest[d_size]; 258 size_t out = 0; 259 260 if (ensc_crypto_hashctx_get_digest(h_ctx, digest, NULL, d_size)==-1) 261 return false; 262 263 for (size_t in=0; 264 out+1<sizeof(HashPath)-(sizeof(unsigned int)*2 + 2) && in<d_size; 265 ++in) { 266 if ((in+254)%(in<=2 ? 1 : 256) == 0 && in>0) 267 d_path[out++]='/'; 268 d_path[out++] = HEX_DIGIT[digest[in] >> 4]; 269 d_path[out++] = HEX_DIGIT[digest[in] & 0x0f]; 270 } 271 d_path[out++] = '\0'; 272 273 return true; 274 } 275 276 #ifndef ENSC_TESTSUITE 277 static bool 278 addStatHash(ensc_hash_context *h_ctx, struct stat const * const st) 279 { 280 #define DECL_ATTR(X) __typeof__(st->st_##X) X 281 #define SET_ATTR(X) .X = st->st_##X 282 283 struct __attribute__((__packed__)) { 284 DECL_ATTR(mode); 285 DECL_ATTR(uid); 286 DECL_ATTR(gid); 287 DECL_ATTR(rdev); 288 DECL_ATTR(size); 289 DECL_ATTR(mtime); 290 } tmp = { 291 SET_ATTR(mode), 292 SET_ATTR(uid), 293 SET_ATTR(gid), 294 SET_ATTR(rdev), 295 SET_ATTR(size), 296 .mtime = (global_args->ignore_mtime ? 0 : st->st_mtime), 297 }; 298 299 #undef SET_ATTR 300 #undef DECL_ATTR 301 302 303 return ensc_crypto_hashctx_update(h_ctx, (void *)&tmp, sizeof tmp)!=-1; 304 } 305 #else 306 static bool 307 addStatHash(ensc_hash_context UNUSED *h_ctx, struct stat const UNUSED * const st) 308 { 309 return true; 310 } 311 #endif 312 313 static bool 314 calculateHashFromFD(int fd, HashPath d_path, struct stat const * const st) 315 { 316 ensc_hash_context * const h_ctx = &global_info.hash_context; 317 void const * volatile buf = 0; 318 loff_t volatile buf_size = 0; 319 bool volatile res = false; 320 321 322 if (ensc_crypto_hashctx_reset(h_ctx)==-1 || 323 !addStatHash(h_ctx, st)) 324 return false; 325 326 bus_error = 0; 327 if (sigsetjmp(bus_error_restore,1)==0) { 328 loff_t offset = 0; 329 off_t size = st->st_size; 330 331 while (offset < size) { 332 buf_size = size-offset; 333 if (buf_size>HASH_BLOCKSIZE) buf_size = HASH_BLOCKSIZE; 334 335 if ((buf=mmap(0, buf_size, PROT_READ, MAP_SHARED, fd, offset))==MAP_FAILED) { 336 perror("mmap(<hash>)"); 337 goto out; 338 } 339 340 offset += buf_size; 341 madvise(const_cast(void *)(buf), buf_size, MADV_SEQUENTIAL); // ignore error... 342 343 if (ensc_crypto_hashctx_update(h_ctx, buf, buf_size)==-1) goto out; 344 345 munmap(const_cast(void *)(buf), buf_size); 346 buf = 0; 347 } 348 349 res = convertDigest(d_path); 350 } 351 352 out: 353 if (buf!=0) munmap(const_cast(void *)(buf), buf_size); 354 return res; 355 } 356 357 static bool 358 calculateHash(PathInfo const *filename, HashPath d_path, struct stat const * const st) 359 { 360 int fd = open(filename->d, O_NOFOLLOW|O_NONBLOCK|O_RDONLY|O_NOCTTY); 361 struct stat fst; 362 bool res = false; 363 364 do { 365 if (fd==-1) { 366 int old_errno = errno; 367 WRITE_MSG(2, "Failed to open '"); 368 WRITE_STR(2, filename->d); 369 errno = old_errno; 370 perror("'"); 371 break;; 372 } 373 374 if (fstat(fd, &fst)==-1 || 375 fst.st_dev!=st->st_dev || fst.st_ino!=st->st_ino) { 376 WRITE_MSG(2, "An unexpected event occured while stating '"); 377 WRITE_STR(2, filename->d); 378 WRITE_MSG(2, "'.\n"); 379 break; 380 } 381 382 if (!calculateHashFromFD(fd, d_path, st)) { 383 WRITE_MSG(2, "Failed to calculate hash for '"); 384 WRITE_STR(2, filename->d); 385 WRITE_MSG(2, "'.\n"); 386 break; 387 } 388 389 res = true; 390 } while (false); 391 392 if (fd!=-1) close(fd); 393 return res; 394 } 395 396 static bool 397 resolveCollisions(char *result, PathInfo const *root, HashPath d_path, 398 struct stat *st, struct stat *hash_st) 399 { 400 strcpy(result, root->d); // 'root' ends on '/' already (see initHashList()) 401 strcat(result, d_path); 402 403 char *ptr = result + strlen(result); 404 unsigned int idx = 0; 405 char buf[sizeof(int)*2 + 1]; 406 size_t len; 407 408 *ptr = '-'; 409 ptr[sizeof(int)*2+1] = '\0'; 410 411 for (;; ++idx) { 412 len = utilvserver_fmt_xuint(buf, idx); 413 memset(ptr+1, '0', sizeof(int)*2 - len); 414 memcpy(ptr+1 + sizeof(int)*2 - len, buf, len); 415 416 if (lstat(result, hash_st)==-1) { 417 if (global_args->dry_run && errno!=ENOENT) { 418 int old_errno = errno; 419 WRITE_MSG(2, "lstat('"); 420 WRITE_STR(2, buf); 421 errno = old_errno; 422 perror("')"); 423 return false; 424 } 425 } 426 else if (Unify_isUnified(st, hash_st)) { 427 skip_reason.r = rsUNIFIED; 428 return false; 429 } 430 else if (!Unify_isUnifyable(st, hash_st)) 431 continue; // continue with next number***** 432 else 433 break; // ok, we finish here 434 435 if (!global_args->dry_run) { 436 *ptr = '\0'; 437 if (!mkdirRecursive(result)) { 438 PERROR_Q("mkdir", result); 439 return false; 440 } 441 *ptr = '-'; 442 443 int fd = open(result, O_NOFOLLOW|O_EXCL|O_CREAT|O_WRONLY, 0200); 444 445 if (fd==-1) { 446 PERROR_Q("open", buf); 447 return false; 448 } 449 450 close(fd); 451 } 452 453 // HACK: avoid an additional lstat on the resulting hash-file 454 hash_st->st_size = 0; 455 break; 456 } 457 458 return true; 459 } 460 461 static char const * 462 checkDirEntry(PathInfo const *path, PathInfo const *basename, 463 bool *is_dir, 464 struct stat *st, struct stat *hash_st, 465 char *result_buf) 466 { 467 //printf("checkDirEntry(%s, %s, %u)\n", path->d, d_path, is_dir); 468 469 struct WalkdownInfo const * const info = &global_info; 470 471 // Check if it is in the exclude/include list of the destination vserver and 472 // abort when it is not matching an allowed entry 473 skip_reason.r = rsEXCL; 474 if (MatchList_compare(&info->dst_list, path->d)!=stINCLUDE) return 0; 475 476 if (checkFstat(basename, st)) { 477 PathInfo const *hash_root_path; 478 HashPath d_path; 479 480 *is_dir = S_ISDIR(st->st_mode); 481 482 if (!*is_dir && 483 !((skip_reason.r = rsWRONGDEV, 484 (hash_root_path = HashDirInfo_findDevice(&info->hash_dirs, st->st_dev))!=0) && 485 (skip_reason.r = rsGENERAL, 486 calculateHash(basename, d_path, st)) && 487 resolveCollisions(result_buf, hash_root_path, d_path, st, hash_st))) 488 return 0; 489 490 return result_buf; 491 } 492 493 return 0; 494 } 495 496 static void 497 printSkipReason() 498 { 499 WRITE_MSG(1, " ("); 500 switch (skip_reason.r) { 501 case rsDOTFILE : WRITE_MSG(1, "dotfile"); break; 502 case rsEXCL : WRITE_MSG(1, "excluded"); break; 503 case rsTOOSMALL : WRITE_MSG(1, "too small"); break; 504 case rsUNSUPPORTED : WRITE_MSG(1, "operation not supported"); break; 505 case rsFSTAT : WRITE_MSG(1, "fstat error"); break; 506 case rsSYMLINK : WRITE_MSG(1, "symlink"); break; 507 case rsUNIFIED : WRITE_MSG(1, "already unified"); break; 508 case rsSPECIAL : WRITE_MSG(1, "non regular file"); break; 509 case rsWRONGDEV : WRITE_MSG(1, "no matching device"); break; 510 case rsGENERAL : WRITE_MSG(1, "general error"); break; 511 default : assert(false); abort(); 512 } 513 WRITE_MSG(1, ")"); 514 } 515 516 static bool 517 doit(char const *src, char const *dst, 518 struct stat const *src_st, struct stat const *dst_st, 519 PathInfo const *path) 520 { 521 if (global_args->dry_run || Global_getVerbosity()>=2) { 522 WRITE_MSG(1, "unifying '"); 523 Vwrite(1, path->d, path->l); 524 WRITE_MSG(1, "'"); 525 526 if (Global_getVerbosity()>=4) { 527 WRITE_MSG(1, " (to '"); 528 WRITE_STR(1, dst); 529 WRITE_MSG(1, "')"); 530 } 531 532 WRITE_MSG(1, "\n"); 533 } 534 535 // abort here in dry-run mode 536 if (global_args->dry_run) return true; 537 538 if (dst_st->st_size==0) { 539 // file was not unified yet 540 541 if (Global_isVserverRunning()) { 542 (void)unlink(dst); 543 if (Unify_copy (src, src_st, dst) && 544 // the mixed 'dst' and 'src_st' params are intentionally... 545 Unify_unify(dst, src_st, src, false)) 546 return true; 547 } 548 else if (Unify_unify(src, src_st, dst, true)) 549 return true; 550 551 (void)unlink(dst); // cleanup in error-case 552 } 553 // there exists already a reference-file 554 else if (Unify_unify(dst, dst_st, src, false)) 555 return true; 556 557 return false; 558 } 559 560 static uint64_t 561 visitDirEntry(struct dirent const *ent) 562 { 563 uint64_t res = 0; 564 char const * dirname = ent->d_name; 565 PathInfo path = global_info.state; 566 PathInfo tmp_path = { 567 .d = dirname, 568 .l = strlen(dirname) 569 }; 570 char path_buf[ENSC_PI_APPSZ(path, tmp_path)]; 571 char const *match = 0; 572 573 574 PathInfo_append(&path, &tmp_path, path_buf); 575 576 bool is_dotfile = isDotfile(dirname); 577 bool is_dir; 578 struct stat src_stat = { .st_mode=0 }; 579 struct stat hash_stat; 580 char tmpbuf[global_info.hash_dirs_max_size + 581 sizeof(HashPath) + 2]; 582 583 skip_reason.r = rsDOTFILE; 584 585 if (is_dotfile || 586 (match=checkDirEntry(&path, &tmp_path, 587 &is_dir, &src_stat, &hash_stat, 588 tmpbuf))==0) { 589 590 bool is_link = !is_dotfile && S_ISLNK(src_stat.st_mode); 591 592 if (Global_getVerbosity()>=1 && 593 (Global_getVerbosity()>=3 || skip_reason.r!=rsUNIFIED) && 594 ((!is_dotfile && !is_link) || 595 (Global_getVerbosity()>=6 && is_dotfile) || 596 (Global_getVerbosity()>=6 && is_link)) ) { 597 WRITE_MSG(1, " skipping '"); 598 Vwrite(1, path.d, path.l); 599 WRITE_MSG(1, "'"); 600 if (Global_getVerbosity()>=2) printSkipReason(); 601 WRITE_MSG(1, "\n"); 602 } 603 604 return 0; 605 } 606 607 if (is_dir) { 608 res = visitDir(dirname, &src_stat); 609 } 610 else if (doit(dirname, match, &src_stat, &hash_stat, &path)) 611 res = 1; 612 else { 613 // TODO: message 614 res = 0; 615 } 616 617 return res; 618 619 } 620 621 int main(int argc, char *argv[]) 622 { 623 struct Arguments args = { 624 .mode = mdVSERVER, 625 .hash_dir = 0, 626 .verbosity = 0, 627 .insecure = 0, 628 .dry_run = false, 629 .do_refresh = false, 630 .ignore_mtime = false, 631 }; 632 633 Vector_init(&global_info.hash_dirs, sizeof(struct HashDirInfo)); 634 635 global_args = &args; 636 while (1) { 637 int c = getopt_long(argc, argv, "+nv", 638 CMDLINE_OPTIONS, 0); 639 if (c==-1) break; 640 641 switch (c) { 642 case CMD_HELP : showHelp(argv[0]); 643 case CMD_VERSION : showVersion(); 644 case CMD_DESTINATION : args.hash_dir = optarg; break; 645 case CMD_MANUALLY : args.mode = mdMANUALLY; break; 646 case CMD_INSECURE : args.insecure = 1; break; 647 case CMD_SLEDGE : args.insecure = 2; break; 648 case CMD_REFRESH : args.do_refresh = true; break; 649 case CMD_NOMTIME : args.ignore_mtime = true; break; 650 case 'n' : args.dry_run = true; break; 651 case 'v' : ++args.verbosity; break; 652 default : 653 WRITE_MSG(2, "Try '"); 654 WRITE_STR(2, argv[0]); 655 WRITE_MSG(2, " --help' for more information.\n"); 656 return EXIT_FAILURE; 657 break; 658 } 659 } 660 661 if (argc==optind) { 662 WRITE_MSG(2, "No directory/vserver given\n"); 663 return EXIT_FAILURE; 664 } 665 666 if (args.hash_dir==0 && args.mode==mdMANUALLY) { 667 WRITE_MSG(2, "'--manually' requires '--destination'\n"); 668 return EXIT_FAILURE; 669 } 670 671 ensc_crypto_init(); 672 switch (args.mode) { 673 case mdMANUALLY : initModeManually(&args, argc-optind, argv+optind); break; 674 case mdVSERVER : initModeVserver (&args, argc-optind, argv+optind); break; 675 default : assert(false); return EXIT_FAILURE; 676 }; 677 678 if (ensc_crypto_hashctx_init(&global_info.hash_context, 679 global_info.hash_conf.method)==-1) { 680 WRITE_MSG(2, "Failed to initialize hash-context\n"); 681 return EXIT_FAILURE; 682 } 683 684 if (Global_getVerbosity()>=1) 685 WRITE_MSG(1, "Starting to traverse directories...\n"); 686 687 signal(SIGBUS, handlerSIGBUS); 688 689 Echdir(global_info.dst_list.root.d); 690 visitDir("/", 0); 691 692 #ifndef NDEBUG 693 MatchList_destroy(&global_info.dst_list); 694 freeHashList(&global_info.hash_dirs); 695 ensc_crypto_hashctx_free(&global_info.hash_context); 696 #endif 697 698 return EXIT_SUCCESS; 699 }