fileset

git mirror of https://ccx.te2000.cz/bzr/fileset
git clone https://ccx.te2000.cz/git/fileset
Log | Files | Refs | README

commit 870d74697f374b90cf5ba3643cdb61c6cd572b3e
parent 4b5aa00a59b0bfae501233813f47e23d4087d15d
Author: Jan Pobrislo <ccx@webprojekty.cz>
Date:   Fri, 15 Dec 2017 01:49:11 +0100

fslist implementations based on cpio and GNU find/sort
Diffstat:
MREADME | 5++++-
Mbin/cpio2fs | 246++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
Mbin/fslist | 5++++-
Abin/fslist.pax | 6++++++
Abin/fslist2 | 6++++++
Abin/fslist3 | 218+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mbin/fsvimdiff | 9+++++++++
7 files changed, 459 insertions(+), 36 deletions(-)

diff --git a/README b/README @@ -50,7 +50,7 @@ h<flags><tab><destination> c<flags><tab><content> create plain file with specified content (content may not contain tab, see C) - a leading newline is appended if the content does not end in newline, + a final newline is appended if the content does not end in newline, unless following flags are given: "n" - always append a newline "N" - never append a newline @@ -58,6 +58,9 @@ c<flags><tab><content> b<flags><tab><content> base64, TODO description +s<flags><tab><content> + checksum, TODO description + Commands that take whole statement: P<tab><path> diff --git a/bin/cpio2fs b/bin/cpio2fs @@ -3,6 +3,7 @@ setopt no_unset warn_create_global no_multibyte typeset -gA cpio_struct ftypes hardlinks +typeset -ga cpio_oldbin_fields typeset -g delim filename ftype fmode ftypes=( # convert hex type to mnemonic character @@ -17,6 +18,19 @@ ftypes=( # convert hex type to mnemonic character 1 p # FIFO ) +cpio_oldbin_fields=( + c_magic 6 + c_dev 6 + c_ino 6 + c_mode 6 + c_uid 6 + c_gid 6 + c_nlink 6 + c_rdev 6 + c_mtime 11 + c_namesize 6 + c_filesize 11 +) ### Defaults for options passed by env {{{1 @@ -34,6 +48,8 @@ else fi fi : ${print_x:=0} +: ${max_bin_size:=} +: ${max_newline_size:=1024} ### Generic helpers {{{1 @@ -46,48 +62,43 @@ die() { ### cpio header reader functions {{{1 -read_newc() { - local field - for field size in - c_magic 6 \ - c_dev 6 \ - c_ino 6 \ - c_mode 6 \ - c_uid 6 \ - c_gid 6 \ - c_nlink 6 \ - c_rdev 6 \ - c_mtime 11 \ - c_namesize 6 \ - c_filesize 11 - do - read -k size || die "Short read of cpio archive header" - [[ $REPLY =~ '^[0-7]{'$size'}$' ]] \ - || die "Invalid octal header value: ${(qqq)REPLY}" - cpio_struct[$field]=$(( [##8] $REPLY )) +read_oldc_header() { + local field bytes + for field size in $cpio_oldbin_fields; do + IFS= read -k $size -u 0 bytes || die "Short read of cpio archive header" + [[ $bytes =~ '^[0-7]{'$size'}$' ]] \ + || die "Invalid octal header value: ${(qqq)bytes}" + cpio_struct[$field]=$(( [##8] $bytes )) done - cpio_struct[rdev_major]=$(( $s[rdev] >> 8 )) - cpio_struct[rdev_minor]=$(( $s[rdev] & 255 )) + cpio_struct[rdev_major]=$(( $cpio_struct[rdev] >> 8 )) + cpio_struct[rdev_minor]=$(( $cpio_struct[rdev] & 255 )) } -read_newc() { - local field +read_newc_header() { + local field bytes cpio_struct=( ) for field in c_ino c_mode c_uid c_gid c_nlink c_mtime c_filesize \ c_devmajor c_devminor c_rdevmajor c_rdevminor c_namesize c_check do - read -k 8 || die "Short read of cpio archive header" - [[ $REPLY =~ '^[0-7a-fA-F]{8}$' ]] \ - || die "Invalid hexadecimal header value: ${(qqq)REPLY}" - cpio_struct[$field]=$(( [##16] $REPLY )) + IFS= read -k 8 -u 0 bytes || die "Short read of cpio archive header" + [[ $bytes =~ '^[0-9a-fA-F]{8}$' ]] \ + || die "Invalid hexadecimal header value: ${(qqq)bytes}" + cpio_struct[$field]=$(( 0x$bytes )) done cpio_struct[c_dev]=$cpio_struct[c_devmajor]:$cpio_struct[c_devminor] + cpio_struct[path_padding]=$[ 3-((3+2+$cpio_struct[c_namesize])%4 ) ] + cpio_struct[data_padding]=$[ 3-((3+$cpio_struct[c_filesize])%4) ] } read_cpio_header() { - read -k 6 || return $? - cpio_struct=( c_magic $REPLY ) - case "$c_magic" in + local magic_bytes + IFS= read -k 6 -u 0 magic_bytes || return $? + cpio_struct=( + c_magic $magic_bytes + path_padding 0 + data_padding 0 + ) + case "$cpio_struct[c_magic]" in (070707) # oldc (aka Portable ASCII Format) read_oldc_header;; (070701) # newc (aka New ASCII Format) @@ -95,7 +106,7 @@ read_cpio_header() { (070702) # crc (aka New CRC Format) is same in structure as newc read_newc_header;; (*) - die "Unknown cpio format number: ${(qqq)c_magic}";; + die "Unknown cpio format number: ${(qqq)cpio_struct[c_magic]}";; esac } @@ -103,7 +114,7 @@ read_cpio_header() { statement() { # start on new line for multiline statements - more readable - if [[ -n delim && $1 == *$'\n'* ]]; then + if [[ -n "$delim" && $1 == *$'\n'* ]]; then delim=$'\n' fi printf '%s%s' $delim ${1//$'\n'/$'\n\t'} @@ -117,14 +128,181 @@ statement_end() { process_file() { - local filename ftype fmode t - filename="$(dd bs=$cpio_struct[c_namesize] count=1 | tr -d '\0')" \ + local filename ftype fmode t padding link + IFS= read -k $[$cpio_struct[c_namesize]-1] -u 0 filename \ || die "Could not read filename" + IFS= read -k $[1 + $cpio_struct[path_padding]] -u 0 padding \ + || die "Could not read filename" + if [[ $filename == "TRAILER!!!"* ]]; then + return + fi ftype=$(( [##16] $cpio_struct[c_mode] >> 12 )) fmode=$(( [##8] $cpio_struct[c_mode] & 8#7777 )) t=$ftypes[$ftype] + #printf "%s %s %s\n" >&2 $t $fmode "${(qqq)filename}" + + # --- print stuff --- + + (($compact)) || printf '\n' + + if [[ $filename == . ]]; then + filename=/ + elif [[ $filename == ./* ]]; then + filename=$filename[2,-1] + fi + if [[ $filename == *$'\t'* || $filename == *$'\n'* ]]; then + statement $'P\t'$filename $'\t' + else + if (($compact)); then + statement /${filename%/} + else + statement /${filename%/} $'\n' + fi + fi + + # Note: not supporting hardlinks + + if [[ $t == [bu] ]]; then + statement $t$cpio_struct[rdev_major]:$cpio_struct[rdev_minor] + (($cpio_struct[c_filesize])) && die "Unexpected data" + elif [[ $t == l ]]; then + (($cpio_struct[c_filesize])) || die "Missing data" + IFS= read -k $cpio_struct[c_filesize] -u 0 link \ + || die "Could not read symbolic link data" + statement $'l\t'$link $'\t' + elif [[ $t == f ]]; then + if (($print_c)); then + process_file_data $cpio_struct[c_filesize] + else + statement f + head -c$cpio_struct[c_filesize] >/dev/null \ + die "Could not read data" + fi + else + statement $t + (($cpio_struct[c_filesize])) && die "Unexpected data" + fi + + (($print_o)) && statement o$cpio_struct[c_uid]:$cpio_struct[c_gid] + (($print_m)) && statement m$fmode + statement_end + + # --- read padding --- + if (($cpio_struct[data_padding])); then + IFS= read -k $cpio_struct[data_padding] -u 0 padding \ + || die "Could not read padding" + # dd status=none count=1 bs=$cpio_struct[data_padding] | xxd + fi } +prhead() { + local remaining + remaining=$1 + shift + printf '%s' "$@" || exit $? + if (($remaining)); then + head -c$remaining || exit $? + fi +} + +process_file_data() { + local size head remaining + local -a print_data + size=$1 + if ! (($cpio_struct[c_filesize])); then + statement cN$'\t' # empty file + return + fi + + if (($size > 256)); then # read up to 256 bytes to determine file type + IFS= read -k 256 -u 0 head || die "Could not read file data" + remaining=$[ $cpio_struct[c_filesize] - 256 ] + else + IFS= read -k $cpio_struct[c_filesize] -u 0 head \ + || die "Could not read file data" + remaining=0 + fi + + # print binary representation? + if (($print_b | $print_x)) && \ + [[ $(printf '%s' "$head" | file -bi -) != text/* ]] + then + if (($remaining)); then + if [[ -n $max_bin_size && $size -gt $max_bin_size ]]; then + statement s$'\tSHA512:'${"$(prhead $remaining "$head" | sha512sum)"%% *} \ + || die "Could not read data" + elif (($print_x)); then + if [[ -n "$delim" ]]; then + printf '\nX' + else + printf 'X' + fi + prhead $remaining "$head" | xxd | sed 's/^/\t/' + ((${(j.|.)pipestatus})) && die "Could not read data" + statement_end + else + if [[ -n "$delim" ]]; then + printf '\nB' + else + printf 'B' + fi + prhead $remaining "$head" | base64 | sed 's/^/\t/' + ((${(j.|.)pipestatus})) && die "Could not read data" + statement_end + fi + else + if [[ -n $max_bin_size && $size -gt $max_bin_size ]]; then + statement s$'\tSHA512:'${"$(printf '%s' "$head" | sha512sum)"%% *} + elif (($print_x)); then + statement X$'\t'"$(printf '%s' "$head" | xxd)" $'\n' + else + statement B$'\t'"$(printf '%s' "$head" | base64)" $'\n' + fi + fi + return + fi + # print text + + # if file is longer than this, always use CN + if (($size > $max_newline_size)); then + if [[ -n "$delim" ]]; then + printf '\nCN\t' + else + printf 'CN\t' + fi + # Swap NL with @ so trailing newline is handled correctly + prhead $remaining "$head" \ + | tr '\n@' '@\n' \ + | sed 's/@/@\t/g' \ + | tr '@\n' '\n@' + ((${(j.|.)pipestatus})) && die "Could not read data" + statement_end + return + fi + + # read and then print out, determinig trailing newline flags + local content flags + if (($remaining)); then + IFS= read -r -d '' -u 0 -k $remaining content \ + || die "Could not read file data" + fi + content=$head$content + flags='' + if [[ $content == *$'\n' ]]; then + content=${content%$'\n'} + if [[ $content == *$'\n' ]]; then + # force appending newline + flags+=n + fi + else + flags+=N + fi + if ! (($compact)) || [[ $content == *$'\t'* || $content == *$'\n'* ]]; then + statement C$flags$'\t'$content $'\n' + else + statement c$flags$'\t'$content + fi +} ### Mainloop {{{1 while read_cpio_header; do diff --git a/bin/fslist b/bin/fslist @@ -48,6 +48,7 @@ else fi fi : ${print_x:=0} +: ${max_bin_size:=} fnames=( ) for arg in "${@:-$ROOT}"; do @@ -96,7 +97,9 @@ for fname in $fnames; do if (($print_c)); then if (($s[size])) && (($print_b + $print_x)) && \ [[ $(file -bi $fname) != text/* ]]; then - if (($print_x)); then + if [[ -n $max_bin_size && $s[size] -gt $max_bin_size ]]; then + statement s$'\tSHA512:'${"$(sha512sum < $fname)"%% *} + elif (($print_x)); then statement X$'\t'"$(xxd $fname)" $'\n' else statement B$'\t'"$(base64 <$fname)" $'\n' diff --git a/bin/fslist.pax b/bin/fslist.pax @@ -0,0 +1,6 @@ +#!/bin/zsh +if [[ -n $ROOT ]]; then + cd $ROOT || exit $? +fi +pax -w -x sv4cpio "$@" | cpio2fs +exit $((${(j.|.)pipestatus})) diff --git a/bin/fslist2 b/bin/fslist2 @@ -0,0 +1,6 @@ +#!/bin/zsh +if [[ -n $ROOT ]]; then + cd $ROOT || exit $? +fi +find "$@" -print0 | sort -z | cpio -o -0 -H newc | cpio2fs +exit $((${(j.|.)pipestatus})) diff --git a/bin/fslist3 b/bin/fslist3 @@ -0,0 +1,218 @@ +#!/bin/zsh +setopt no_unset warn_create_global no_multibyte +zmodload zsh/stat + +typeset -gA ftypes hardlinks s +typeset -g delim find fname ftype fmode + +ftypes=( # convert hex type to mnemonic character + C s # socket + c s + A l # symbolic link + a l + 8 f # regular file + 6 b # block device + 4 d # directory + 2 u # character device + 1 p # FIFO +) + +### Defaults for options passed by env {{{1 + +# TODO: make into command-line arguments +: ${compact:=1} +: ${print_m:=1} +: ${print_o:=1} +: ${print_c:=1} +: ${print_s:=1} +if (($+commands[file])) && (($+commands[base64])); then + : ${print_b:=1} +else + : ${print_b:=0} + if (($+commands[file])) && (($+commands[xxd])); then + : ${print_x:=1} + fi +fi +: ${print_x:=0} +: ${max_bin_size:=} +: ${max_newline_size:=1024} + +### Generic helpers {{{1 + +die() { + printf '%s\n' "$@" + exit 1 +} + +### FileSet writer functions {{{1 + +statement() { + # start on new line for multiline statements - more readable + if [[ -n "$delim" && $1 == *$'\n'* ]]; then + delim=$'\n' + fi + printf '%s%s' $delim ${1//$'\n'/$'\n\t'} + delim=${2:-$'\t'} +} + +statement_end() { + printf '\n' + delim='' +} + + +process_file() { + local -a find_info + local t fmode owner size filename + find_info=( $=1 ) + t=$find_info[1] + fmode=$find_info[2] + owner=$find_info[3] + size=$find_info[4] + + # --- print stuff --- + + (($compact)) || printf '\n' + + filename="$fname" + if [[ $filename == . ]]; then + filename=/ + elif [[ $filename == ./* ]]; then + filename=$filename[2,-1] + fi + filename=${filename#/} + + # printf "%s %s %s\n" >&2 $t $fmode "${(qqq)filename}" + + if [[ $filename == *$'\t'* || $filename == *$'\n'* ]]; then + statement $'P\t'$filename $'\t' + else + if (($compact)); then + statement /${filename} + else + statement /${filename} $'\n' + fi + fi + + # Note: not supporting hardlinks (yet) + + if [[ $t == [bu] ]]; then + zstat -LH s $fname || die "stat failed on ${(qqq)fname}" + statement $t$(( $s[rdev] >> 8 )):$(( $s[rdev] & 255 )) + elif [[ $t == l ]]; then + zstat -LH s $fname || die "stat failed on ${(qqq)fname}" + statement $'l\t'$s[link] $'\t' + elif [[ $t == f ]]; then + if (($print_c)); then + process_file_data $size + else + statement f + if (($print_s)); then + statement s$'\tSHA512:'${"$(sha512sum <$fname)"%% *} \ + || die "Could not read ${(qqq)fname}" + fi + fi + else + statement $t + fi + + (($print_o)) && statement o$owner + (($print_m)) && statement m$fmode + statement_end +} + +process_file_data() { + local size + size=$1 + if ! (($size)); then + statement cN$'\t' # empty file + return + fi + + # print binary representation? + if (($print_b | $print_x)) && \ + [[ $(file -bi "$fname") != text/* ]] + then + if (($size > 256)); then + if [[ -n $max_bin_size && $size -gt $max_bin_size ]]; then + statement s$'\tSHA512:'${"$(sha512sum <$fname)"%% *} \ + || die "Could not read ${(qqq)fname}" + elif (($print_x)); then + if [[ -n "$delim" ]]; then + printf '\nX' + else + printf 'X' + fi + xxd <$fname | sed 's/^/\t/' + ((${(j.|.)pipestatus})) && die "Could not read ${(qqq)fname}" + statement_end + else + if [[ -n "$delim" ]]; then + printf '\nB' + else + printf 'B' + fi + base64 <$fname | sed 's/^/\t/' + ((${(j.|.)pipestatus})) && die "Could not read ${(qqq)fname}" + statement_end + fi + else + if [[ -n $max_bin_size && $size -gt $max_bin_size ]]; then + statement s$'\tSHA512:'${"$(sha512sum <$fname)"%% *} + elif (($print_x)); then + statement X$'\t'"$(xxd <$fname)" $'\n' + else + statement B$'\t'"$(base64 <$fname)" $'\n' + fi + fi + return + fi + # print text + + # if file is longer than this, always use CN + if (($size > $max_newline_size)); then + if [[ -n "$delim" ]]; then + printf '\nCN\t' + else + printf 'CN\t' + fi + # Swap NL with @ so trailing newline is handled correctly + tr <$fname '\n@' '@\n' \ + | sed 's/@/@\t/g' \ + | tr '@\n' '\n@' + ((${(j.|.)pipestatus})) && die "Could not read ${(qqq)fname}" + statement_end + return + fi + + # read and then print out, determinig trailing newline flags + local content flags + content="$(<$fname)" + flags='' + if [[ $content == *$'\n' ]]; then + content=${content%$'\n'} + if [[ $content == *$'\n' ]]; then + # force appending newline + flags+=n + fi + else + flags+=N + fi + if ! (($compact)) || [[ $content == *$'\t'* || $content == *$'\n'* ]]; then + statement C$flags$'\t'$content $'\n' + else + statement c$flags$'\t'$content + fi +} + +### Mainloop {{{1 +if (($+ROOT)) && [[ -n $ROOT ]]; then + cd $ROOT || exit $? +fi + +find "$@" -printf '%y %m %U:%G %s\t%p\0' \ + | sort -z -t $'\t' -k 2 \ + | while IFS=$'\t' read -r -d $'\0' find fname +do + process_file "$find" "$fname" +done diff --git a/bin/fsvimdiff b/bin/fsvimdiff @@ -13,3 +13,12 @@ exec vim \ +"setlocal $opts" \ +"exe 'file '.\$ROOT2.'.fs'" \ +'norm ggzM' +#exec vim \ +# +'0r!ROOT="$ROOT1" fslist2 .' \ +# +"setlocal $opts" \ +# +"exe 'file '.\$ROOT1.'.fs'" \ +# +'rightb vnew' \ +# +'0r!ROOT="$ROOT2" fslist2 .' \ +# +"setlocal $opts" \ +# +"exe 'file '.\$ROOT2.'.fs'" \ +# +'norm ggzM'