#!/usr/bin/env python2
# vim: fileencoding=utf-8 ft=python et sw=4 ts=4 sts=4 tw=79
from __future__ import (
generators, division, absolute_import, with_statement, print_function
)
import sys
import os.path
import argparse
import subprocess
import mmap
from magic import mime_magic
parser = argparse.ArgumentParser()
parser.add_argument('--max-bin-size', type=int, default=4096)
parser.add_argument('--max-text-size', type=int, default=-1)
parser.add_argument('--max-line-length', type=int, default=160)
parser.add_argument('--compact', action='store_true', default=False)
parser.add_argument('--no-compact', dest='compact', action='store_false')
parser.add_argument('--content', action='store_true', default=True)
parser.add_argument('--no-content', dest='content', action='store_false')
parser.add_argument('--owner', action='store_true', default=True)
parser.add_argument('--no-owner', dest='owner', action='store_false')
parser.add_argument('--mode', action='store_true', default=True)
parser.add_argument('--no-mode', dest='mode', action='store_false')
hash_utils = {
'md5': ('md5sum',),
'sha1': ('sha1sum',),
'sha224': ('sha224sum',),
'sha256': ('sha256sum',),
'sha384': ('sha384sum',),
'sha512': ('sha512sum',),
'blake2': ('b2sum',),
}
parser.add_argument('--hash', default='sha512',
choices=tuple(hash_utils.keys()))
bin_utils = {
'b': ('base64',),
'x': ('xxd',),
}
parser.add_argument('--bin-format', default='b',
choices=tuple(bin_utils.keys()))
parser.add_argument('root', nargs='*')
ftypes = { # convert numeric type to mnemonic character
0xC: 's', # socket
0xA: 'l', # symbolic link
0x8: 'f', # regular file
0x6: 'b', # block device
0x4: 'd', # directory
0x2: 'u', # character device
0x1: 'p', # FIFO
}
SOCKET = 0xC
LINK = 0xA
FILE = 0x8
BLOCK = 0x6
DIR = 0x4
CHAR = 0x2
FIFO = 0x1
class Output(object):
def __init__(self, fobj):
self.fobj = fobj
self.delim = ''
def write(self, data):
self.fobj.write(data)
def close(self):
self.fobj.close()
def statement(self, text, delim=''):
# start on new line for multiline statements - more readable
if self.delim and '\n' in text:
self.delim = '\n'
self.write(self.delim)
self.write(text.replace('\n', '\n\t'))
self.delim = delim or '\t'
def statement_end(self):
self.write('\n')
self.delim = ''
def indent_copy(self, source, strip_last=True):
line = source.readline()
had_endl = False
while line:
if had_endl:
self.write('\t')
had_endl = line[-1] == '\n'
self.write(line)
line = source.readline()
if not had_endl:
self.write('\n')
elif not strip_last:
self.write('\t\n')
self.delim = ''
def hash_file(f, hash, out):
out.statement('s\t%s:%s' % (
hash.upper(),
subprocess.check_output(hash_utils[hash], stdin=f).split(None, 1)[0],
))
def process_content(args, f, size, out):
if not args.compact:
out.delim = '\n'
if mime_magic.descriptor(f.fileno()).startswith('text/'):
# text file
if args.max_text_size >= 0 and size > args.max_text_size:
hash_file(f, args.hash, out)
elif size > args.max_line_length:
# Always use the multiline form for larger files
mm = mmap.mmap(f.fileno(), 0, mmap.MAP_PRIVATE)
try:
flags = ('n' if mm[-1] == '\n' else 'N')
out.statement('C%s\t' % flags, '')
out.indent_copy(mm)
finally:
mm.close()
else:
content = f.read()
flags = 'N'
if content and content[-1] == '\n':
content = content[:-1]
flags = 'n'
if args.compact and not (
'\n' in content or '\t' in content
):
out.statement('c%s\t%s' % (flags, content))
else:
out.statement('C%s\t%s' % (flags, content), '\n')
else:
# binary file
if args.max_bin_size >= 0 and size > args.max_bin_size:
hash_file(f, args.hash, out)
else:
p = subprocess.Popen(
bin_utils[args.bin_format],
4096,
stdin=f,
stdout=subprocess.PIPE,
)
out.statement('%s\t' % args.bin_format.upper(), '')
out.indent_copy(p.stdout)
if p.wait():
raise RuntimeError(
"calling %r on %r failed with %r" % (
bin_utils[args.bin_format],
f.name,
p.returncode,
)
)
def process_file(args, relname, rel_to, s, out):
fname = os.path.join(rel_to, relname)
# s = os.lstat(fname)
ftype = s.st_mode >> 12
mode = s.st_mode & 07777
t = ftypes[ftype]
if not args.compact:
out.write('\n')
if '\t' in relname or '\n' in relname:
out.statement('P\t' + relname, '\t')
else:
if args.compact:
out.statement('/' + relname)
else:
out.statement('/' + relname, '\t')
if ftype != DIR and s.st_nlink > 1:
pass # TODO: process hardlinks
if ftype == BLOCK or ftype == CHAR:
out.statement('%s%d:%d' % (t, s.st_rdev >> 8, s.st_rdev & 255))
elif ftype == LINK:
out.statement('l\t' + os.readlink(fname), '\t')
elif ftype == FILE:
if args.content:
with open(fname, 'rb') as f:
process_content(args, f, s.st_size, out)
else:
out.statement('f')
else:
out.statement(t)
if args.owner:
out.statement('o%d:%d' % (s.st_uid, s.st_gid))
if args.mode:
out.statement('m%04o' % mode,)
out.statement_end()
def statwalk(root, top='', sort_func=sorted, exclude=None):
names = os.listdir(os.path.join(root, top))
stats = [os.lstat(os.path.join(root, top, name)) for name in names]
for (name, s) in sort_func(zip(names, stats)):
relname = os.path.join(top, name)
if exclude is not None and exclude(root, top, name, s):
continue
yield (relname, s)
if DIR == s.st_mode >> 12:
# We don't have readdir() and directory filedescriptors available
# so we will keep just appending to the path.
for x in statwalk(root, relname):
yield x
def process_root(args, root, out):
for relname, s in statwalk(root):
process_file(args, relname, root, s, out)
def main(args):
out = Output(sys.stdout)
if not args.root:
process_root(args, '.', out)
else:
for root in args.root:
process_root(args, root, out)
if __name__ == '__main__':
args = parser.parse_args()
main(args)