fslist.py (7066B)
1 #!/usr/bin/env python2 2 # vim: fileencoding=utf-8 ft=python et sw=4 ts=4 sts=4 tw=79 3 4 from __future__ import ( 5 generators, division, absolute_import, with_statement, print_function 6 ) 7 import sys 8 import os.path 9 import argparse 10 import subprocess 11 import mmap 12 13 from magic import mime_magic 14 15 16 parser = argparse.ArgumentParser() 17 parser.add_argument('--max-bin-size', type=int, default=4096) 18 parser.add_argument('--max-text-size', type=int, default=-1) 19 parser.add_argument('--max-line-length', type=int, default=160) 20 21 parser.add_argument('--compact', action='store_true', default=False) 22 parser.add_argument('--no-compact', dest='compact', action='store_false') 23 24 parser.add_argument('--content', action='store_true', default=True) 25 parser.add_argument('--no-content', dest='content', action='store_false') 26 27 parser.add_argument('--owner', action='store_true', default=True) 28 parser.add_argument('--no-owner', dest='owner', action='store_false') 29 30 parser.add_argument('--mode', action='store_true', default=True) 31 parser.add_argument('--no-mode', dest='mode', action='store_false') 32 33 hash_utils = { 34 'md5': ('md5sum',), 35 'sha1': ('sha1sum',), 36 'sha224': ('sha224sum',), 37 'sha256': ('sha256sum',), 38 'sha384': ('sha384sum',), 39 'sha512': ('sha512sum',), 40 'blake2': ('b2sum',), 41 } 42 parser.add_argument('--hash', default='sha512', 43 choices=tuple(hash_utils.keys())) 44 45 bin_utils = { 46 'b': ('base64',), 47 'x': ('xxd',), 48 } 49 parser.add_argument('--bin-format', default='b', 50 choices=tuple(bin_utils.keys())) 51 52 parser.add_argument('root', nargs='*') 53 54 55 ftypes = { # convert numeric type to mnemonic character 56 0xC: 's', # socket 57 0xA: 'l', # symbolic link 58 0x8: 'f', # regular file 59 0x6: 'b', # block device 60 0x4: 'd', # directory 61 0x2: 'u', # character device 62 0x1: 'p', # FIFO 63 } 64 65 SOCKET = 0xC 66 LINK = 0xA 67 FILE = 0x8 68 BLOCK = 0x6 69 DIR = 0x4 70 CHAR = 0x2 71 FIFO = 0x1 72 73 74 class Output(object): 75 def __init__(self, fobj): 76 self.fobj = fobj 77 self.delim = '' 78 79 def write(self, data): 80 self.fobj.write(data) 81 82 def close(self): 83 self.fobj.close() 84 85 def statement(self, text, delim=''): 86 # start on new line for multiline statements - more readable 87 if self.delim and '\n' in text: 88 self.delim = '\n' 89 self.write(self.delim) 90 self.write(text.replace('\n', '\n\t')) 91 self.delim = delim or '\t' 92 93 def statement_end(self): 94 self.write('\n') 95 self.delim = '' 96 97 def indent_copy(self, source, strip_last=True): 98 line = source.readline() 99 had_endl = False 100 while line: 101 if had_endl: 102 self.write('\t') 103 had_endl = line[-1] == '\n' 104 self.write(line) 105 line = source.readline() 106 if not had_endl: 107 self.write('\n') 108 elif not strip_last: 109 self.write('\t\n') 110 self.delim = '' 111 112 113 def hash_file(f, hash, out): 114 out.statement('s\t%s:%s' % ( 115 hash.upper(), 116 subprocess.check_output(hash_utils[hash], stdin=f).split(None, 1)[0], 117 )) 118 119 120 def process_content(args, f, size, out): 121 if not args.compact: 122 out.delim = '\n' 123 if mime_magic.descriptor(f.fileno()).startswith('text/'): 124 # text file 125 if args.max_text_size >= 0 and size > args.max_text_size: 126 hash_file(f, args.hash, out) 127 elif size > args.max_line_length: 128 # Always use the multiline form for larger files 129 mm = mmap.mmap(f.fileno(), 0, mmap.MAP_PRIVATE) 130 try: 131 flags = ('n' if mm[-1] == '\n' else 'N') 132 out.statement('C%s\t' % flags, '') 133 out.indent_copy(mm) 134 finally: 135 mm.close() 136 else: 137 content = f.read() 138 flags = 'N' 139 if content and content[-1] == '\n': 140 content = content[:-1] 141 flags = 'n' 142 if args.compact and not ( 143 '\n' in content or '\t' in content 144 ): 145 out.statement('c%s\t%s' % (flags, content)) 146 else: 147 out.statement('C%s\t%s' % (flags, content), '\n') 148 else: 149 # binary file 150 if args.max_bin_size >= 0 and size > args.max_bin_size: 151 hash_file(f, args.hash, out) 152 else: 153 p = subprocess.Popen( 154 bin_utils[args.bin_format], 155 4096, 156 stdin=f, 157 stdout=subprocess.PIPE, 158 ) 159 out.statement('%s\t' % args.bin_format.upper(), '') 160 out.indent_copy(p.stdout) 161 if p.wait(): 162 raise RuntimeError( 163 "calling %r on %r failed with %r" % ( 164 bin_utils[args.bin_format], 165 f.name, 166 p.returncode, 167 ) 168 ) 169 170 171 def process_file(args, relname, rel_to, s, out): 172 fname = os.path.join(rel_to, relname) 173 # s = os.lstat(fname) 174 ftype = s.st_mode >> 12 175 mode = s.st_mode & 07777 176 t = ftypes[ftype] 177 178 if not args.compact: 179 out.write('\n') 180 181 if '\t' in relname or '\n' in relname: 182 out.statement('P\t' + relname, '\t') 183 else: 184 if args.compact: 185 out.statement('/' + relname) 186 else: 187 out.statement('/' + relname, '\t') 188 189 if ftype != DIR and s.st_nlink > 1: 190 pass # TODO: process hardlinks 191 192 if ftype == BLOCK or ftype == CHAR: 193 out.statement('%s%d:%d' % (t, s.st_rdev >> 8, s.st_rdev & 255)) 194 elif ftype == LINK: 195 out.statement('l\t' + os.readlink(fname), '\t') 196 elif ftype == FILE: 197 if args.content: 198 with open(fname, 'rb') as f: 199 process_content(args, f, s.st_size, out) 200 else: 201 out.statement('f') 202 else: 203 out.statement(t) 204 205 if args.owner: 206 out.statement('o%d:%d' % (s.st_uid, s.st_gid)) 207 208 if args.mode: 209 out.statement('m%04o' % mode,) 210 211 out.statement_end() 212 213 214 def statwalk(root, top='', sort_func=sorted, exclude=None): 215 names = os.listdir(os.path.join(root, top)) 216 stats = [os.lstat(os.path.join(root, top, name)) for name in names] 217 218 for (name, s) in sort_func(zip(names, stats)): 219 relname = os.path.join(top, name) 220 if exclude is not None and exclude(root, top, name, s): 221 continue 222 yield (relname, s) 223 if DIR == s.st_mode >> 12: 224 # We don't have readdir() and directory filedescriptors available 225 # so we will keep just appending to the path. 226 for x in statwalk(root, relname): 227 yield x 228 229 230 def process_root(args, root, out): 231 for relname, s in statwalk(root): 232 process_file(args, relname, root, s, out) 233 234 235 def main(args): 236 out = Output(sys.stdout) 237 238 if not args.root: 239 process_root(args, '.', out) 240 else: 241 for root in args.root: 242 process_root(args, root, out) 243 244 245 if __name__ == '__main__': 246 args = parser.parse_args() 247 main(args)