#!/bin/zsh
# Reads cpio archive and emits fileset file
setopt no_unset warn_create_global no_multibyte
typeset -gA cpio_struct ftypes hardlinks
typeset -ga cpio_oldbin_fields
typeset -g delim filename ftype fmode
ftypes=( # convert hex type to mnemonic character
C s # socket
c s
A l # symbolic link
a l
8 f # regular file
6 b # block device
4 d # directory
2 u # character device
1 p # FIFO
)
cpio_oldbin_fields=(
c_magic 6
c_dev 6
c_ino 6
c_mode 6
c_uid 6
c_gid 6
c_nlink 6
c_rdev 6
c_mtime 11
c_namesize 6
c_filesize 11
)
### Defaults for options passed by env {{{1
# TODO: make into command-line arguments
: ${compact:=1}
: ${print_m:=1}
: ${print_o:=1}
: ${print_c:=1}
if (($+commands[file])) && (($+commands[base64])); then
: ${print_b:=1}
else
: ${print_b:=0}
if (($+commands[file])) && (($+commands[xxd])); then
: ${print_x:=1}
fi
fi
: ${print_x:=0}
: ${max_bin_size:=}
: ${max_newline_size:=1024}
### Generic helpers {{{1
die() {
printf '%s\n' "$@"
exit 1
}
### cpio header reader functions {{{1
read_oldc_header() {
local field bytes
for field size in $cpio_oldbin_fields; do
IFS= read -k $size -u 0 bytes || die "Short read of cpio archive header"
[[ $bytes =~ '^[0-7]{'$size'}$' ]] \
|| die "Invalid octal header value: ${(qqq)bytes}"
cpio_struct[$field]=$(( [##8] $bytes ))
done
cpio_struct[rdev_major]=$(( $cpio_struct[rdev] >> 8 ))
cpio_struct[rdev_minor]=$(( $cpio_struct[rdev] & 255 ))
}
read_newc_header() {
local field bytes
cpio_struct=( )
for field in c_ino c_mode c_uid c_gid c_nlink c_mtime c_filesize \
c_devmajor c_devminor c_rdevmajor c_rdevminor c_namesize c_check
do
IFS= read -k 8 -u 0 bytes || die "Short read of cpio archive header"
[[ $bytes =~ '^[0-9a-fA-F]{8}$' ]] \
|| die "Invalid hexadecimal header value: ${(qqq)bytes}"
cpio_struct[$field]=$(( 0x$bytes ))
done
cpio_struct[c_dev]=$cpio_struct[c_devmajor]:$cpio_struct[c_devminor]
cpio_struct[path_padding]=$[ 3-((3+2+$cpio_struct[c_namesize])%4 ) ]
cpio_struct[data_padding]=$[ 3-((3+$cpio_struct[c_filesize])%4) ]
}
read_cpio_header() {
local magic_bytes
IFS= read -k 6 -u 0 magic_bytes || return $?
cpio_struct=(
c_magic $magic_bytes
path_padding 0
data_padding 0
)
case "$cpio_struct[c_magic]" in
(070707) # oldc (aka Portable ASCII Format)
read_oldc_header;;
(070701) # newc (aka New ASCII Format)
read_newc_header;;
(070702) # crc (aka New CRC Format) is same in structure as newc
read_newc_header;;
(*)
die "Unknown cpio format number: ${(qqq)cpio_struct[c_magic]}";;
esac
}
### FileSet writer functions {{{1
statement() {
# start on new line for multiline statements - more readable
if [[ -n "$delim" && $1 == *$'\n'* ]]; then
delim=$'\n'
fi
printf '%s%s' $delim ${1//$'\n'/$'\n\t'}
delim=${2:-$'\t'}
}
statement_end() {
printf '\n'
delim=''
}
process_file() {
local filename ftype fmode t padding link
IFS= read -k $[$cpio_struct[c_namesize]-1] -u 0 filename \
|| die "Could not read filename"
IFS= read -k $[1 + $cpio_struct[path_padding]] -u 0 padding \
|| die "Could not read filename"
if [[ $filename == "TRAILER!!!"* ]]; then
return
fi
ftype=$(( [##16] $cpio_struct[c_mode] >> 12 ))
fmode=$(( [##8] $cpio_struct[c_mode] & 8#7777 ))
t=$ftypes[$ftype]
#printf "%s %s %s\n" >&2 $t $fmode "${(qqq)filename}"
# --- print stuff ---
(($compact)) || printf '\n'
if [[ $filename == . ]]; then
filename=/
elif [[ $filename == ./* ]]; then
filename=$filename[2,-1]
fi
if [[ $filename == *$'\t'* || $filename == *$'\n'* ]]; then
statement $'P\t'$filename $'\t'
else
if (($compact)); then
statement /${filename%/}
else
statement /${filename%/} $'\n'
fi
fi
# Note: not supporting hardlinks
if [[ $t == [bu] ]]; then
statement $t$cpio_struct[rdev_major]:$cpio_struct[rdev_minor]
(($cpio_struct[c_filesize])) && die "Unexpected data"
elif [[ $t == l ]]; then
(($cpio_struct[c_filesize])) || die "Missing data"
IFS= read -k $cpio_struct[c_filesize] -u 0 link \
|| die "Could not read symbolic link data"
statement $'l\t'$link $'\t'
elif [[ $t == f ]]; then
if (($print_c)); then
process_file_data $cpio_struct[c_filesize]
else
statement f
head -c$cpio_struct[c_filesize] >/dev/null \
die "Could not read data"
fi
else
statement $t
(($cpio_struct[c_filesize])) && die "Unexpected data"
fi
(($print_o)) && statement o$cpio_struct[c_uid]:$cpio_struct[c_gid]
(($print_m)) && statement m$fmode
statement_end
# --- read padding ---
if (($cpio_struct[data_padding])); then
IFS= read -k $cpio_struct[data_padding] -u 0 padding \
|| die "Could not read padding"
# dd status=none count=1 bs=$cpio_struct[data_padding] | xxd
fi
}
prhead() {
local remaining
remaining=$1
shift
printf '%s' "$@" || exit $?
if (($remaining)); then
head -c$remaining || exit $?
fi
}
process_file_data() {
local size head remaining
local -a print_data
size=$1
if ! (($cpio_struct[c_filesize])); then
statement cN$'\t' # empty file
return
fi
if (($size > 256)); then # read up to 256 bytes to determine file type
IFS= read -k 256 -u 0 head || die "Could not read file data"
remaining=$[ $cpio_struct[c_filesize] - 256 ]
else
IFS= read -k $cpio_struct[c_filesize] -u 0 head \
|| die "Could not read file data"
remaining=0
fi
# print binary representation?
if (($print_b | $print_x)) && \
[[ $(printf '%s' "$head" | file -bi -) != text/* ]]
then
if (($remaining)); then
if [[ -n $max_bin_size && $size -gt $max_bin_size ]]; then
statement s$'\tSHA512:'${"$(prhead $remaining "$head" | sha512sum)"%% *} \
|| die "Could not read data"
elif (($print_x)); then
if [[ -n "$delim" ]]; then
printf '\nX'
else
printf 'X'
fi
prhead $remaining "$head" | xxd | sed 's/^/\t/'
((${(j.|.)pipestatus})) && die "Could not read data"
statement_end
else
if [[ -n "$delim" ]]; then
printf '\nB'
else
printf 'B'
fi
prhead $remaining "$head" | base64 | sed 's/^/\t/'
((${(j.|.)pipestatus})) && die "Could not read data"
statement_end
fi
else
if [[ -n $max_bin_size && $size -gt $max_bin_size ]]; then
statement s$'\tSHA512:'${"$(printf '%s' "$head" | sha512sum)"%% *}
elif (($print_x)); then
statement X$'\t'"$(printf '%s' "$head" | xxd)" $'\n'
else
statement B$'\t'"$(printf '%s' "$head" | base64)" $'\n'
fi
fi
return
fi
# print text
# if file is longer than this, always use CN
if (($size > $max_newline_size)); then
if [[ -n "$delim" ]]; then
printf '\nCN\t'
else
printf 'CN\t'
fi
# Swap NL with @ so trailing newline is handled correctly
prhead $remaining "$head" \
| tr '\n@' '@\n' \
| sed 's/@/@\t/g' \
| tr '@\n' '\n@'
((${(j.|.)pipestatus})) && die "Could not read data"
statement_end
return
fi
# read and then print out, determinig trailing newline flags
local content flags
if (($remaining)); then
IFS= read -r -d '' -u 0 -k $remaining content \
|| die "Could not read file data"
fi
content=$head$content
flags=''
if [[ $content == *$'\n' ]]; then
content=${content%$'\n'}
if [[ $content == *$'\n' ]]; then
# force appending newline
flags+=n
fi
else
flags+=N
fi
if ! (($compact)) || [[ $content == *$'\t'* || $content == *$'\n'* ]]; then
statement C$flags$'\t'$content $'\n'
else
statement c$flags$'\t'$content
fi
}
### Mainloop {{{1
while read_cpio_header; do
process_file
done