commit fda4d61014bf57f50c4f6f6f542cf24b9a19ae59
parent 870d74697f374b90cf5ba3643cdb61c6cd572b3e
Author: Jan Pobrislo <ccx@webprojekty.cz>
Date: Wed, 21 Nov 2018 16:48:47 +0100
New, faster fslist.py.
Diffstat:
| A | bin/fslist.py | | | 244 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
1 file changed, 244 insertions(+), 0 deletions(-)
diff --git a/bin/fslist.py b/bin/fslist.py
@@ -0,0 +1,244 @@
+#!/usr/bin/env python2
+# vim: fileencoding=utf-8 ft=python et sw=4 ts=4 sts=4 tw=79
+
+from __future__ import (
+ generators, division, absolute_import, with_statement, print_function
+)
+import sys
+import os.path
+import argparse
+import subprocess
+import mmap
+
+from magic import mime_magic
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--max-bin-size', type=int, default=4096)
+parser.add_argument('--max-text-size', type=int, default=-1)
+parser.add_argument('--max-line-length', type=int, default=160)
+
+parser.add_argument('--compact', action='store_true', default=False)
+parser.add_argument('--no-compact', dest='compact', action='store_false')
+
+parser.add_argument('--content', action='store_true', default=True)
+parser.add_argument('--no-content', dest='content', action='store_false')
+
+parser.add_argument('--owner', action='store_true', default=True)
+parser.add_argument('--no-owner', dest='owner', action='store_false')
+
+parser.add_argument('--mode', action='store_true', default=True)
+parser.add_argument('--no-mode', dest='mode', action='store_false')
+
+hash_utils = {
+ 'md5': ('md5sum',),
+ 'sha1': ('sha1sum',),
+ 'sha224': ('sha224sum',),
+ 'sha256': ('sha256sum',),
+ 'sha384': ('sha384sum',),
+ 'sha512': ('sha512sum',),
+ 'blake2': ('b2sum',),
+}
+parser.add_argument('--hash', default='sha512',
+ choices=tuple(hash_utils.keys()))
+
+bin_utils = {
+ 'b': ('base64',),
+ 'x': ('xxd',),
+}
+parser.add_argument('--bin-format', default='b',
+ choices=tuple(bin_utils.keys()))
+
+parser.add_argument('root', nargs='*')
+
+
+ftypes = { # convert numeric type to mnemonic character
+ 0xC: 's', # socket
+ 0xA: 'l', # symbolic link
+ 0x8: 'f', # regular file
+ 0x6: 'b', # block device
+ 0x4: 'd', # directory
+ 0x2: 'u', # character device
+ 0x1: 'p', # FIFO
+}
+
+SOCKET = 0xC
+LINK = 0xA
+FILE = 0x8
+BLOCK = 0x6
+DIR = 0x4
+CHAR = 0x2
+FIFO = 0x1
+
+
+class Output(object):
+ def __init__(self, fobj):
+ self.fobj = fobj
+ self.delim = ''
+
+ def write(self, data):
+ self.fobj.write(data)
+
+ def close(self):
+ self.fobj.close()
+
+ def statement(self, text, delim=''):
+ # start on new line for multiline statements - more readable
+ if self.delim and '\n' in text:
+ self.delim = '\n'
+ self.write(self.delim)
+ self.write(text.replace('\n', '\n\t'))
+ self.delim = delim or '\t'
+
+ def statement_end(self):
+ self.write('\n')
+ self.delim = ''
+
+ def indent_copy(self, source, strip_last=True):
+ line = source.readline()
+ had_endl = False
+ while line:
+ if had_endl:
+ self.write('\t')
+ had_endl = line[-1] == '\n'
+ self.write(line)
+ line = source.readline()
+ if not had_endl:
+ self.write('\n')
+ elif not strip_last:
+ self.write('\t\n')
+ self.delim = ''
+
+
+def hash_file(f, hash, out):
+ out.statement('s\t%s:%s' % (
+ hash.upper(),
+ subprocess.check_output(hash_utils[hash], stdin=f).split(None, 1)[0],
+ ))
+
+
+def process_content(args, f, size, out):
+ if not args.compact:
+ out.delim = '\n'
+ if mime_magic.descriptor(f.fileno()).startswith('text/'):
+ # text file
+ if args.max_text_size >= 0 and size > args.max_text_size:
+ hash_file(f, args.hash, out)
+ elif size > args.max_line_length:
+ # Always use the multiline form for larger files
+ mm = mmap.mmap(f.fileno(), 0, mmap.MAP_PRIVATE)
+ flags = ('n' if mm[-1] == '\n' else 'N')
+ out.statement('C%s\t' % flags, '')
+ out.indent_copy(mm)
+ else:
+ content = f.read()
+ flags = 'N'
+ if content and content[-1] == '\n':
+ content = content[:-1]
+ flags = 'n'
+ if args.compact and not (
+ '\n' in content or '\t' in content
+ ):
+ out.statement('c%s\t%s' % (flags, content))
+ else:
+ out.statement('C%s\t%s' % (flags, content), '\n')
+ else:
+ # binary file
+ if args.max_bin_size >= 0 and size > args.max_bin_size:
+ hash_file(f, args.hash, out)
+ else:
+ p = subprocess.Popen(
+ bin_utils[args.bin_format],
+ 4096,
+ stdin=f,
+ stdout=subprocess.PIPE,
+ )
+ out.statement('%s\t' % args.bin_format.upper(), '')
+ out.indent_copy(p.stdout)
+ if p.wait():
+ raise RuntimeError(
+ "calling %r on %r failed with %r" % (
+ bin_utils[args.bin_format],
+ f.name,
+ p.returncode,
+ )
+ )
+
+
+def process_file(args, relname, rel_to, s, out):
+ fname = os.path.join(rel_to, relname)
+ # s = os.lstat(fname)
+ ftype = s.st_mode >> 12
+ mode = s.st_mode & 07777
+ t = ftypes[ftype]
+
+ if not args.compact:
+ out.write('\n')
+
+ if '\t' in relname or '\n' in relname:
+ out.statement('P\t' + relname, '\t')
+ else:
+ if args.compact:
+ out.statement('/' + relname)
+ else:
+ out.statement('/' + relname, '\t')
+
+ if ftype != DIR and s.st_nlink > 1:
+ pass # TODO: process hardlinks
+
+ if ftype == BLOCK or ftype == CHAR:
+ out.statement('%s%d:%d' % (t, s.st_rdev >> 8, s.st_rdev & 255))
+ elif ftype == LINK:
+ out.statement('l\t' + os.readlink(fname), '\t')
+ elif ftype == FILE:
+ if args.content:
+ with open(fname, 'rb') as f:
+ process_content(args, f, s.st_size, out)
+ else:
+ out.statement('f')
+ else:
+ out.statement(t)
+
+ if args.owner:
+ out.statement('o%d:%d' % (s.st_uid, s.st_gid))
+
+ if args.mode:
+ out.statement('m%04o' % mode,)
+
+ out.statement_end()
+
+
+def statwalk(root, top='', sort_func=sorted, exclude=None):
+ names = os.listdir(os.path.join(root, top))
+ stats = [os.lstat(os.path.join(root, top, name)) for name in names]
+
+ for (name, s) in sort_func(zip(names, stats)):
+ relname = os.path.join(top, name)
+ if exclude is not None and exclude(root, top, name, s):
+ continue
+ yield (relname, s)
+ if DIR == s.st_mode >> 12:
+ # We don't have readdir() and directory filedescriptors available
+ # so we will keep just appending to the path.
+ for x in statwalk(root, relname):
+ yield x
+
+
+def process_root(args, root, out):
+ for relname, s in statwalk(root):
+ process_file(args, relname, root, s, out)
+
+
+def main(args):
+ out = Output(sys.stdout)
+
+ if not args.root:
+ process_root(args, '.', out)
+ else:
+ for root in args.root:
+ process_root(args, root, out)
+
+
+if __name__ == '__main__':
+ args = parser.parse_args()
+ main(args)