#!/bin/zsh # Reads cpio archive and emits fileset file setopt no_unset warn_create_global no_multibyte typeset -gA cpio_struct ftypes hardlinks typeset -ga cpio_oldbin_fields typeset -g delim filename ftype fmode ftypes=( # convert hex type to mnemonic character C s # socket c s A l # symbolic link a l 8 f # regular file 6 b # block device 4 d # directory 2 u # character device 1 p # FIFO ) cpio_oldbin_fields=( c_magic 6 c_dev 6 c_ino 6 c_mode 6 c_uid 6 c_gid 6 c_nlink 6 c_rdev 6 c_mtime 11 c_namesize 6 c_filesize 11 ) ### Defaults for options passed by env {{{1 # TODO: make into command-line arguments : ${compact:=1} : ${print_m:=1} : ${print_o:=1} : ${print_c:=1} if (($+commands[file])) && (($+commands[base64])); then : ${print_b:=1} else : ${print_b:=0} if (($+commands[file])) && (($+commands[xxd])); then : ${print_x:=1} fi fi : ${print_x:=0} : ${max_bin_size:=} : ${max_newline_size:=1024} ### Generic helpers {{{1 die() { printf '%s\n' "$@" exit 1 } ### cpio header reader functions {{{1 read_oldc_header() { local field bytes for field size in $cpio_oldbin_fields; do IFS= read -k $size -u 0 bytes || die "Short read of cpio archive header" [[ $bytes =~ '^[0-7]{'$size'}$' ]] \ || die "Invalid octal header value: ${(qqq)bytes}" cpio_struct[$field]=$(( [##8] $bytes )) done cpio_struct[rdev_major]=$(( $cpio_struct[rdev] >> 8 )) cpio_struct[rdev_minor]=$(( $cpio_struct[rdev] & 255 )) } read_newc_header() { local field bytes cpio_struct=( ) for field in c_ino c_mode c_uid c_gid c_nlink c_mtime c_filesize \ c_devmajor c_devminor c_rdevmajor c_rdevminor c_namesize c_check do IFS= read -k 8 -u 0 bytes || die "Short read of cpio archive header" [[ $bytes =~ '^[0-9a-fA-F]{8}$' ]] \ || die "Invalid hexadecimal header value: ${(qqq)bytes}" cpio_struct[$field]=$(( 0x$bytes )) done cpio_struct[c_dev]=$cpio_struct[c_devmajor]:$cpio_struct[c_devminor] cpio_struct[path_padding]=$[ 3-((3+2+$cpio_struct[c_namesize])%4 ) ] cpio_struct[data_padding]=$[ 3-((3+$cpio_struct[c_filesize])%4) ] } read_cpio_header() { local magic_bytes IFS= read -k 6 -u 0 magic_bytes || return $? cpio_struct=( c_magic $magic_bytes path_padding 0 data_padding 0 ) case "$cpio_struct[c_magic]" in (070707) # oldc (aka Portable ASCII Format) read_oldc_header;; (070701) # newc (aka New ASCII Format) read_newc_header;; (070702) # crc (aka New CRC Format) is same in structure as newc read_newc_header;; (*) die "Unknown cpio format number: ${(qqq)cpio_struct[c_magic]}";; esac } ### FileSet writer functions {{{1 statement() { # start on new line for multiline statements - more readable if [[ -n "$delim" && $1 == *$'\n'* ]]; then delim=$'\n' fi printf '%s%s' $delim ${1//$'\n'/$'\n\t'} delim=${2:-$'\t'} } statement_end() { printf '\n' delim='' } process_file() { local filename ftype fmode t padding link IFS= read -k $[$cpio_struct[c_namesize]-1] -u 0 filename \ || die "Could not read filename" IFS= read -k $[1 + $cpio_struct[path_padding]] -u 0 padding \ || die "Could not read filename" if [[ $filename == "TRAILER!!!"* ]]; then return fi ftype=$(( [##16] $cpio_struct[c_mode] >> 12 )) fmode=$(( [##8] $cpio_struct[c_mode] & 8#7777 )) t=$ftypes[$ftype] #printf "%s %s %s\n" >&2 $t $fmode "${(qqq)filename}" # --- print stuff --- (($compact)) || printf '\n' if [[ $filename == . ]]; then filename=/ elif [[ $filename == ./* ]]; then filename=$filename[2,-1] fi if [[ $filename == *$'\t'* || $filename == *$'\n'* ]]; then statement $'P\t'$filename $'\t' else if (($compact)); then statement /${filename%/} else statement /${filename%/} $'\n' fi fi # Note: not supporting hardlinks if [[ $t == [bu] ]]; then statement $t$cpio_struct[rdev_major]:$cpio_struct[rdev_minor] (($cpio_struct[c_filesize])) && die "Unexpected data" elif [[ $t == l ]]; then (($cpio_struct[c_filesize])) || die "Missing data" IFS= read -k $cpio_struct[c_filesize] -u 0 link \ || die "Could not read symbolic link data" statement $'l\t'$link $'\t' elif [[ $t == f ]]; then if (($print_c)); then process_file_data $cpio_struct[c_filesize] else statement f head -c$cpio_struct[c_filesize] >/dev/null \ die "Could not read data" fi else statement $t (($cpio_struct[c_filesize])) && die "Unexpected data" fi (($print_o)) && statement o$cpio_struct[c_uid]:$cpio_struct[c_gid] (($print_m)) && statement m$fmode statement_end # --- read padding --- if (($cpio_struct[data_padding])); then IFS= read -k $cpio_struct[data_padding] -u 0 padding \ || die "Could not read padding" # dd status=none count=1 bs=$cpio_struct[data_padding] | xxd fi } prhead() { local remaining remaining=$1 shift printf '%s' "$@" || exit $? if (($remaining)); then head -c$remaining || exit $? fi } process_file_data() { local size head remaining local -a print_data size=$1 if ! (($cpio_struct[c_filesize])); then statement cN$'\t' # empty file return fi if (($size > 256)); then # read up to 256 bytes to determine file type IFS= read -k 256 -u 0 head || die "Could not read file data" remaining=$[ $cpio_struct[c_filesize] - 256 ] else IFS= read -k $cpio_struct[c_filesize] -u 0 head \ || die "Could not read file data" remaining=0 fi # print binary representation? if (($print_b | $print_x)) && \ [[ $(printf '%s' "$head" | file -bi -) != text/* ]] then if (($remaining)); then if [[ -n $max_bin_size && $size -gt $max_bin_size ]]; then statement s$'\tSHA512:'${"$(prhead $remaining "$head" | sha512sum)"%% *} \ || die "Could not read data" elif (($print_x)); then if [[ -n "$delim" ]]; then printf '\nX' else printf 'X' fi prhead $remaining "$head" | xxd | sed 's/^/\t/' ((${(j.|.)pipestatus})) && die "Could not read data" statement_end else if [[ -n "$delim" ]]; then printf '\nB' else printf 'B' fi prhead $remaining "$head" | base64 | sed 's/^/\t/' ((${(j.|.)pipestatus})) && die "Could not read data" statement_end fi else if [[ -n $max_bin_size && $size -gt $max_bin_size ]]; then statement s$'\tSHA512:'${"$(printf '%s' "$head" | sha512sum)"%% *} elif (($print_x)); then statement X$'\t'"$(printf '%s' "$head" | xxd)" $'\n' else statement B$'\t'"$(printf '%s' "$head" | base64)" $'\n' fi fi return fi # print text # if file is longer than this, always use CN if (($size > $max_newline_size)); then if [[ -n "$delim" ]]; then printf '\nCN\t' else printf 'CN\t' fi # Swap NL with @ so trailing newline is handled correctly prhead $remaining "$head" \ | tr '\n@' '@\n' \ | sed 's/@/@\t/g' \ | tr '@\n' '\n@' ((${(j.|.)pipestatus})) && die "Could not read data" statement_end return fi # read and then print out, determinig trailing newline flags local content flags if (($remaining)); then IFS= read -r -d '' -u 0 -k $remaining content \ || die "Could not read file data" fi content=$head$content flags='' if [[ $content == *$'\n' ]]; then content=${content%$'\n'} if [[ $content == *$'\n' ]]; then # force appending newline flags+=n fi else flags+=N fi if ! (($compact)) || [[ $content == *$'\t'* || $content == *$'\n'* ]]; then statement C$flags$'\t'$content $'\n' else statement c$flags$'\t'$content fi } ### Mainloop {{{1 while read_cpio_header; do process_file done