#!/usr/bin/env python2 # vim: fileencoding=utf-8 ft=python et sw=4 ts=4 sts=4 tw=79 from __future__ import ( generators, division, absolute_import, with_statement, print_function ) import sys import os.path import argparse import subprocess import mmap from magic import mime_magic parser = argparse.ArgumentParser() parser.add_argument('--max-bin-size', type=int, default=4096) parser.add_argument('--max-text-size', type=int, default=-1) parser.add_argument('--max-line-length', type=int, default=160) parser.add_argument('--compact', action='store_true', default=False) parser.add_argument('--no-compact', dest='compact', action='store_false') parser.add_argument('--content', action='store_true', default=True) parser.add_argument('--no-content', dest='content', action='store_false') parser.add_argument('--owner', action='store_true', default=True) parser.add_argument('--no-owner', dest='owner', action='store_false') parser.add_argument('--mode', action='store_true', default=True) parser.add_argument('--no-mode', dest='mode', action='store_false') hash_utils = { 'md5': ('md5sum',), 'sha1': ('sha1sum',), 'sha224': ('sha224sum',), 'sha256': ('sha256sum',), 'sha384': ('sha384sum',), 'sha512': ('sha512sum',), 'blake2': ('b2sum',), } parser.add_argument('--hash', default='sha512', choices=tuple(hash_utils.keys())) bin_utils = { 'b': ('base64',), 'x': ('xxd',), } parser.add_argument('--bin-format', default='b', choices=tuple(bin_utils.keys())) parser.add_argument('root', nargs='*') ftypes = { # convert numeric type to mnemonic character 0xC: 's', # socket 0xA: 'l', # symbolic link 0x8: 'f', # regular file 0x6: 'b', # block device 0x4: 'd', # directory 0x2: 'u', # character device 0x1: 'p', # FIFO } SOCKET = 0xC LINK = 0xA FILE = 0x8 BLOCK = 0x6 DIR = 0x4 CHAR = 0x2 FIFO = 0x1 class Output(object): def __init__(self, fobj): self.fobj = fobj self.delim = '' def write(self, data): self.fobj.write(data) def close(self): self.fobj.close() def statement(self, text, delim=''): # start on new line for multiline statements - more readable if self.delim and '\n' in text: self.delim = '\n' self.write(self.delim) self.write(text.replace('\n', '\n\t')) self.delim = delim or '\t' def statement_end(self): self.write('\n') self.delim = '' def indent_copy(self, source, strip_last=True): line = source.readline() had_endl = False while line: if had_endl: self.write('\t') had_endl = line[-1] == '\n' self.write(line) line = source.readline() if not had_endl: self.write('\n') elif not strip_last: self.write('\t\n') self.delim = '' def hash_file(f, hash, out): out.statement('s\t%s:%s' % ( hash.upper(), subprocess.check_output(hash_utils[hash], stdin=f).split(None, 1)[0], )) def process_content(args, f, size, out): if not args.compact: out.delim = '\n' if mime_magic.descriptor(f.fileno()).startswith('text/'): # text file if args.max_text_size >= 0 and size > args.max_text_size: hash_file(f, args.hash, out) elif size > args.max_line_length: # Always use the multiline form for larger files mm = mmap.mmap(f.fileno(), 0, mmap.MAP_PRIVATE) try: flags = ('n' if mm[-1] == '\n' else 'N') out.statement('C%s\t' % flags, '') out.indent_copy(mm) finally: mm.close() else: content = f.read() flags = 'N' if content and content[-1] == '\n': content = content[:-1] flags = 'n' if args.compact and not ( '\n' in content or '\t' in content ): out.statement('c%s\t%s' % (flags, content)) else: out.statement('C%s\t%s' % (flags, content), '\n') else: # binary file if args.max_bin_size >= 0 and size > args.max_bin_size: hash_file(f, args.hash, out) else: p = subprocess.Popen( bin_utils[args.bin_format], 4096, stdin=f, stdout=subprocess.PIPE, ) out.statement('%s\t' % args.bin_format.upper(), '') out.indent_copy(p.stdout) if p.wait(): raise RuntimeError( "calling %r on %r failed with %r" % ( bin_utils[args.bin_format], f.name, p.returncode, ) ) def process_file(args, relname, rel_to, s, out): fname = os.path.join(rel_to, relname) # s = os.lstat(fname) ftype = s.st_mode >> 12 mode = s.st_mode & 07777 t = ftypes[ftype] if not args.compact: out.write('\n') if '\t' in relname or '\n' in relname: out.statement('P\t' + relname, '\t') else: if args.compact: out.statement('/' + relname) else: out.statement('/' + relname, '\t') if ftype != DIR and s.st_nlink > 1: pass # TODO: process hardlinks if ftype == BLOCK or ftype == CHAR: out.statement('%s%d:%d' % (t, s.st_rdev >> 8, s.st_rdev & 255)) elif ftype == LINK: out.statement('l\t' + os.readlink(fname), '\t') elif ftype == FILE: if args.content: with open(fname, 'rb') as f: process_content(args, f, s.st_size, out) else: out.statement('f') else: out.statement(t) if args.owner: out.statement('o%d:%d' % (s.st_uid, s.st_gid)) if args.mode: out.statement('m%04o' % mode,) out.statement_end() def statwalk(root, top='', sort_func=sorted, exclude=None): names = os.listdir(os.path.join(root, top)) stats = [os.lstat(os.path.join(root, top, name)) for name in names] for (name, s) in sort_func(zip(names, stats)): relname = os.path.join(top, name) if exclude is not None and exclude(root, top, name, s): continue yield (relname, s) if DIR == s.st_mode >> 12: # We don't have readdir() and directory filedescriptors available # so we will keep just appending to the path. for x in statwalk(root, relname): yield x def process_root(args, root, out): for relname, s in statwalk(root): process_file(args, relname, root, s, out) def main(args): out = Output(sys.stdout) if not args.root: process_root(args, '.', out) else: for root in args.root: process_root(args, root, out) if __name__ == '__main__': args = parser.parse_args() main(args)