fileset

git mirror of https://ccx.te2000.cz/bzr/fileset
git clone https://ccx.te2000.cz/git/fileset
Log | Files | Refs | README

fslist.py (7066B)


      1 #!/usr/bin/env python2
      2 # vim: fileencoding=utf-8 ft=python et sw=4 ts=4 sts=4 tw=79
      3 
      4 from __future__ import (
      5     generators, division, absolute_import, with_statement, print_function
      6 )
      7 import sys
      8 import os.path
      9 import argparse
     10 import subprocess
     11 import mmap
     12 
     13 from magic import mime_magic
     14 
     15 
     16 parser = argparse.ArgumentParser()
     17 parser.add_argument('--max-bin-size', type=int, default=4096)
     18 parser.add_argument('--max-text-size', type=int, default=-1)
     19 parser.add_argument('--max-line-length', type=int, default=160)
     20 
     21 parser.add_argument('--compact', action='store_true', default=False)
     22 parser.add_argument('--no-compact', dest='compact', action='store_false')
     23 
     24 parser.add_argument('--content', action='store_true', default=True)
     25 parser.add_argument('--no-content', dest='content', action='store_false')
     26 
     27 parser.add_argument('--owner', action='store_true', default=True)
     28 parser.add_argument('--no-owner', dest='owner', action='store_false')
     29 
     30 parser.add_argument('--mode', action='store_true', default=True)
     31 parser.add_argument('--no-mode', dest='mode', action='store_false')
     32 
     33 hash_utils = {
     34     'md5': ('md5sum',),
     35     'sha1': ('sha1sum',),
     36     'sha224': ('sha224sum',),
     37     'sha256': ('sha256sum',),
     38     'sha384': ('sha384sum',),
     39     'sha512': ('sha512sum',),
     40     'blake2': ('b2sum',),
     41 }
     42 parser.add_argument('--hash', default='sha512',
     43                     choices=tuple(hash_utils.keys()))
     44 
     45 bin_utils = {
     46     'b': ('base64',),
     47     'x': ('xxd',),
     48 }
     49 parser.add_argument('--bin-format', default='b',
     50                     choices=tuple(bin_utils.keys()))
     51 
     52 parser.add_argument('root', nargs='*')
     53 
     54 
     55 ftypes = {  # convert numeric type to mnemonic character
     56     0xC: 's',  # socket
     57     0xA: 'l',  # symbolic link
     58     0x8: 'f',  # regular file
     59     0x6: 'b',  # block device
     60     0x4: 'd',  # directory
     61     0x2: 'u',  # character device
     62     0x1: 'p',  # FIFO
     63 }
     64 
     65 SOCKET = 0xC
     66 LINK = 0xA
     67 FILE = 0x8
     68 BLOCK = 0x6
     69 DIR = 0x4
     70 CHAR = 0x2
     71 FIFO = 0x1
     72 
     73 
     74 class Output(object):
     75     def __init__(self, fobj):
     76         self.fobj = fobj
     77         self.delim = ''
     78 
     79     def write(self, data):
     80         self.fobj.write(data)
     81 
     82     def close(self):
     83         self.fobj.close()
     84 
     85     def statement(self, text, delim=''):
     86         # start on new line for multiline statements - more readable
     87         if self.delim and '\n' in text:
     88             self.delim = '\n'
     89         self.write(self.delim)
     90         self.write(text.replace('\n', '\n\t'))
     91         self.delim = delim or '\t'
     92 
     93     def statement_end(self):
     94         self.write('\n')
     95         self.delim = ''
     96 
     97     def indent_copy(self, source, strip_last=True):
     98         line = source.readline()
     99         had_endl = False
    100         while line:
    101             if had_endl:
    102                 self.write('\t')
    103             had_endl = line[-1] == '\n'
    104             self.write(line)
    105             line = source.readline()
    106         if not had_endl:
    107             self.write('\n')
    108         elif not strip_last:
    109             self.write('\t\n')
    110         self.delim = ''
    111 
    112 
    113 def hash_file(f, hash, out):
    114     out.statement('s\t%s:%s' % (
    115         hash.upper(),
    116         subprocess.check_output(hash_utils[hash], stdin=f).split(None, 1)[0],
    117     ))
    118 
    119 
    120 def process_content(args, f, size, out):
    121     if not args.compact:
    122         out.delim = '\n'
    123     if mime_magic.descriptor(f.fileno()).startswith('text/'):
    124         # text file
    125         if args.max_text_size >= 0 and size > args.max_text_size:
    126             hash_file(f, args.hash, out)
    127         elif size > args.max_line_length:
    128             # Always use the multiline form for larger files
    129             mm = mmap.mmap(f.fileno(), 0, mmap.MAP_PRIVATE)
    130             try:
    131                 flags = ('n' if mm[-1] == '\n' else 'N')
    132                 out.statement('C%s\t' % flags, '')
    133                 out.indent_copy(mm)
    134             finally:
    135                 mm.close()
    136         else:
    137             content = f.read()
    138             flags = 'N'
    139             if content and content[-1] == '\n':
    140                 content = content[:-1]
    141                 flags = 'n'
    142             if args.compact and not (
    143                     '\n' in content or '\t' in content
    144             ):
    145                 out.statement('c%s\t%s' % (flags, content))
    146             else:
    147                 out.statement('C%s\t%s' % (flags, content), '\n')
    148     else:
    149         # binary file
    150         if args.max_bin_size >= 0 and size > args.max_bin_size:
    151             hash_file(f, args.hash, out)
    152         else:
    153             p = subprocess.Popen(
    154                 bin_utils[args.bin_format],
    155                 4096,
    156                 stdin=f,
    157                 stdout=subprocess.PIPE,
    158             )
    159             out.statement('%s\t' % args.bin_format.upper(), '')
    160             out.indent_copy(p.stdout)
    161             if p.wait():
    162                 raise RuntimeError(
    163                     "calling %r on %r failed with %r" % (
    164                         bin_utils[args.bin_format],
    165                         f.name,
    166                         p.returncode,
    167                     )
    168                 )
    169 
    170 
    171 def process_file(args, relname, rel_to, s, out):
    172     fname = os.path.join(rel_to, relname)
    173     # s = os.lstat(fname)
    174     ftype = s.st_mode >> 12
    175     mode = s.st_mode & 07777
    176     t = ftypes[ftype]
    177 
    178     if not args.compact:
    179         out.write('\n')
    180 
    181     if '\t' in relname or '\n' in relname:
    182         out.statement('P\t' + relname, '\t')
    183     else:
    184         if args.compact:
    185             out.statement('/' + relname)
    186         else:
    187             out.statement('/' + relname, '\t')
    188 
    189     if ftype != DIR and s.st_nlink > 1:
    190         pass  # TODO: process hardlinks
    191 
    192     if ftype == BLOCK or ftype == CHAR:
    193         out.statement('%s%d:%d' % (t, s.st_rdev >> 8, s.st_rdev & 255))
    194     elif ftype == LINK:
    195         out.statement('l\t' + os.readlink(fname), '\t')
    196     elif ftype == FILE:
    197         if args.content:
    198             with open(fname, 'rb') as f:
    199                 process_content(args, f, s.st_size, out)
    200         else:
    201             out.statement('f')
    202     else:
    203         out.statement(t)
    204 
    205     if args.owner:
    206         out.statement('o%d:%d' % (s.st_uid, s.st_gid))
    207 
    208     if args.mode:
    209         out.statement('m%04o' % mode,)
    210 
    211     out.statement_end()
    212 
    213 
    214 def statwalk(root, top='', sort_func=sorted, exclude=None):
    215     names = os.listdir(os.path.join(root, top))
    216     stats = [os.lstat(os.path.join(root, top, name)) for name in names]
    217 
    218     for (name, s) in sort_func(zip(names, stats)):
    219         relname = os.path.join(top, name)
    220         if exclude is not None and exclude(root, top, name, s):
    221             continue
    222         yield (relname, s)
    223         if DIR == s.st_mode >> 12:
    224             # We don't have readdir() and directory filedescriptors available
    225             # so we will keep just appending to the path.
    226             for x in statwalk(root, relname):
    227                 yield x
    228 
    229 
    230 def process_root(args, root, out):
    231     for relname, s in statwalk(root):
    232         process_file(args, relname, root, s, out)
    233 
    234 
    235 def main(args):
    236     out = Output(sys.stdout)
    237 
    238     if not args.root:
    239         process_root(args, '.', out)
    240     else:
    241         for root in args.root:
    242             process_root(args, root, out)
    243 
    244 
    245 if __name__ == '__main__':
    246     args = parser.parse_args()
    247     main(args)