fileset

git mirror of https://ccx.te2000.cz/bzr/fileset
git clone https://ccx.te2000.cz/git/fileset
Log | Files | Refs | README

cpio2fs (7499B)


      1 #!/bin/zsh
      2 # Reads cpio archive and emits fileset file
      3 setopt no_unset warn_create_global no_multibyte
      4 
      5 typeset -gA cpio_struct ftypes hardlinks
      6 typeset -ga cpio_oldbin_fields
      7 typeset -g delim filename ftype fmode
      8 
      9 ftypes=(  # convert hex type to mnemonic character
     10 	C  s  # socket
     11 	c  s
     12 	A  l  # symbolic link
     13 	a  l
     14 	8  f  # regular file
     15 	6  b  # block device
     16 	4  d  # directory
     17 	2  u  # character device
     18 	1  p  # FIFO
     19 )
     20 
     21 cpio_oldbin_fields=(
     22 	c_magic 6
     23 	c_dev 6
     24 	c_ino 6
     25 	c_mode 6
     26 	c_uid 6
     27 	c_gid 6
     28 	c_nlink 6
     29 	c_rdev 6
     30 	c_mtime 11
     31 	c_namesize 6
     32 	c_filesize 11
     33 )
     34 
     35 ### Defaults for options passed by env {{{1
     36 
     37 # TODO: make into command-line arguments
     38 : ${compact:=1}
     39 : ${print_m:=1}
     40 : ${print_o:=1}
     41 : ${print_c:=1}
     42 if (($+commands[file])) && (($+commands[base64])); then
     43 	: ${print_b:=1}
     44 else
     45 	: ${print_b:=0}
     46 	if (($+commands[file])) && (($+commands[xxd])); then
     47 		: ${print_x:=1}
     48 	fi
     49 fi
     50 : ${print_x:=0}
     51 : ${max_bin_size:=}
     52 : ${max_newline_size:=1024}
     53 
     54 
     55 ### Generic helpers {{{1
     56 
     57 die() {
     58 	printf '%s\n' "$@"
     59 	exit 1
     60 }
     61 
     62 
     63 ### cpio header reader functions {{{1
     64 
     65 read_oldc_header() {
     66 	local field bytes
     67 	for field size in $cpio_oldbin_fields; do
     68 		IFS= read -k $size -u 0 bytes || die "Short read of cpio archive header"
     69 		[[ $bytes =~ '^[0-7]{'$size'}$' ]] \
     70 			|| die "Invalid octal header value: ${(qqq)bytes}"
     71 		cpio_struct[$field]=$(( [##8] $bytes ))
     72 	done
     73 	cpio_struct[rdev_major]=$(( $cpio_struct[rdev] >> 8 ))
     74 	cpio_struct[rdev_minor]=$(( $cpio_struct[rdev] & 255 ))
     75 }
     76 
     77 read_newc_header() {
     78 	local field bytes
     79 	cpio_struct=( )
     80 	for field in c_ino c_mode c_uid c_gid c_nlink c_mtime c_filesize \
     81 		c_devmajor c_devminor c_rdevmajor c_rdevminor c_namesize c_check
     82 	do
     83 		IFS= read -k 8 -u 0 bytes || die "Short read of cpio archive header"
     84 		[[ $bytes =~ '^[0-9a-fA-F]{8}$' ]] \
     85 			|| die "Invalid hexadecimal header value: ${(qqq)bytes}"
     86 		cpio_struct[$field]=$(( 0x$bytes ))
     87 	done
     88 	cpio_struct[c_dev]=$cpio_struct[c_devmajor]:$cpio_struct[c_devminor]
     89 	cpio_struct[path_padding]=$[ 3-((3+2+$cpio_struct[c_namesize])%4 ) ]
     90 	cpio_struct[data_padding]=$[ 3-((3+$cpio_struct[c_filesize])%4) ]
     91 }
     92 
     93 read_cpio_header() {
     94 	local magic_bytes
     95 	IFS= read -k 6 -u 0 magic_bytes || return $?
     96 	cpio_struct=(
     97 		c_magic $magic_bytes
     98 		path_padding 0
     99 		data_padding 0
    100 	)
    101 	case "$cpio_struct[c_magic]" in
    102 		(070707)  # oldc (aka Portable ASCII Format)
    103 			read_oldc_header;;
    104 		(070701)  # newc (aka New ASCII Format)
    105 			read_newc_header;;
    106 		(070702)  # crc (aka New CRC Format) is same in structure as newc
    107 			read_newc_header;;
    108 		(*)
    109 			die "Unknown cpio format number: ${(qqq)cpio_struct[c_magic]}";;
    110 	esac
    111 }
    112 
    113 ### FileSet writer functions {{{1
    114 
    115 statement() {
    116 	# start on new line for multiline statements - more readable
    117 	if [[ -n "$delim" && $1 == *$'\n'* ]]; then
    118 		delim=$'\n'
    119 	fi
    120 	printf '%s%s' $delim ${1//$'\n'/$'\n\t'}
    121 	delim=${2:-$'\t'}
    122 }
    123 
    124 statement_end() {
    125 	printf '\n'
    126 	delim=''
    127 }
    128 
    129 
    130 process_file() {
    131 	local filename ftype fmode t padding link
    132 	IFS= read -k $[$cpio_struct[c_namesize]-1] -u 0 filename \
    133 		|| die "Could not read filename"
    134 	IFS= read -k $[1 + $cpio_struct[path_padding]] -u 0 padding \
    135 		|| die "Could not read filename"
    136 	if [[ $filename == "TRAILER!!!"* ]]; then
    137 		return
    138 	fi
    139 	ftype=$(( [##16] $cpio_struct[c_mode] >> 12 ))
    140 	fmode=$(( [##8] $cpio_struct[c_mode] & 8#7777 ))
    141 	t=$ftypes[$ftype]
    142 	#printf "%s %s %s\n" >&2 $t $fmode "${(qqq)filename}"
    143 
    144 	# --- print stuff ---
    145 
    146 	(($compact)) || printf '\n'
    147 
    148 	if [[ $filename == . ]]; then
    149 		filename=/
    150 	elif [[ $filename == ./* ]]; then
    151 		filename=$filename[2,-1]
    152 	fi
    153 	if [[ $filename == *$'\t'* || $filename == *$'\n'* ]]; then
    154 		statement $'P\t'$filename $'\t'
    155 	else
    156 		if (($compact)); then
    157 			statement /${filename%/}
    158 		else
    159 			statement /${filename%/} $'\n'
    160 		fi
    161 	fi
    162 
    163 	# Note: not supporting hardlinks
    164 
    165 	if [[ $t == [bu] ]]; then
    166 		statement $t$cpio_struct[rdev_major]:$cpio_struct[rdev_minor]
    167 		(($cpio_struct[c_filesize])) && die "Unexpected data"
    168 	elif [[ $t == l ]]; then
    169 		(($cpio_struct[c_filesize])) || die "Missing data"
    170 		IFS= read -k $cpio_struct[c_filesize] -u 0 link \
    171 			|| die "Could not read symbolic link data"
    172 		statement $'l\t'$link $'\t'
    173 	elif [[ $t == f ]]; then
    174 		if (($print_c)); then
    175 			process_file_data $cpio_struct[c_filesize]
    176 		else
    177 			statement f
    178 			head -c$cpio_struct[c_filesize] >/dev/null \
    179 				die "Could not read data"
    180 		fi
    181 	else
    182 		statement $t
    183 		(($cpio_struct[c_filesize])) && die "Unexpected data"
    184 	fi
    185 
    186 	(($print_o)) && statement o$cpio_struct[c_uid]:$cpio_struct[c_gid]
    187 	(($print_m)) && statement m$fmode
    188 	statement_end
    189 
    190 	# --- read padding ---
    191 	if (($cpio_struct[data_padding])); then
    192 		IFS= read -k $cpio_struct[data_padding] -u 0 padding \
    193 			|| die "Could not read padding"
    194 		# dd status=none count=1 bs=$cpio_struct[data_padding] | xxd
    195 	fi
    196 }
    197 
    198 prhead() {
    199 	local remaining
    200 	remaining=$1
    201 	shift
    202 	printf '%s' "$@" || exit $?
    203 	if (($remaining)); then
    204 		head -c$remaining || exit $?
    205 	fi
    206 }
    207 
    208 process_file_data() {
    209 	local size head remaining
    210 	local -a print_data
    211 	size=$1
    212 	if ! (($cpio_struct[c_filesize])); then
    213 		statement cN$'\t'  # empty file
    214 		return
    215 	fi
    216 
    217 	if (($size > 256)); then  # read up to 256 bytes to determine file type
    218 		IFS= read -k 256 -u 0 head || die "Could not read file data"
    219 		remaining=$[ $cpio_struct[c_filesize] - 256 ]
    220 	else
    221 		IFS= read -k $cpio_struct[c_filesize] -u 0 head \
    222 			|| die "Could not read file data"
    223 		remaining=0
    224 	fi
    225 		
    226 	# print binary representation?
    227 	if (($print_b | $print_x)) && \
    228 		[[ $(printf '%s' "$head" | file -bi -) != text/* ]]
    229 	then
    230 		if (($remaining)); then
    231 			if [[ -n $max_bin_size && $size -gt $max_bin_size ]]; then
    232 				statement s$'\tSHA512:'${"$(prhead $remaining "$head" | sha512sum)"%% *} \
    233 					|| die "Could not read data"
    234 			elif (($print_x)); then
    235 				if [[ -n "$delim" ]]; then
    236 					printf '\nX'
    237 				else
    238 					printf 'X'
    239 				fi
    240 				prhead $remaining "$head" | xxd | sed 's/^/\t/'
    241 				((${(j.|.)pipestatus})) && die "Could not read data"
    242 				statement_end
    243 			else
    244 				if [[ -n "$delim" ]]; then
    245 					printf '\nB'
    246 				else
    247 					printf 'B'
    248 				fi
    249 				prhead $remaining "$head" | base64 | sed 's/^/\t/'
    250 				((${(j.|.)pipestatus})) && die "Could not read data"
    251 				statement_end
    252 			fi
    253 		else
    254 			if [[ -n $max_bin_size && $size -gt $max_bin_size ]]; then
    255 				statement s$'\tSHA512:'${"$(printf '%s' "$head" | sha512sum)"%% *}
    256 			elif (($print_x)); then
    257 				statement X$'\t'"$(printf '%s' "$head" | xxd)" $'\n'
    258 			else
    259 				statement B$'\t'"$(printf '%s' "$head" | base64)" $'\n'
    260 			fi
    261 		fi
    262 		return
    263 	fi
    264 	# print text
    265 
    266 	# if file is longer than this, always use CN
    267 	if (($size > $max_newline_size)); then
    268 		if [[ -n "$delim" ]]; then
    269 			printf '\nCN\t'
    270 		else
    271 			printf 'CN\t'
    272 		fi
    273 		# Swap NL with @ so trailing newline is handled correctly
    274 		prhead $remaining "$head" \
    275 			| tr '\n@' '@\n' \
    276 			| sed 's/@/@\t/g' \
    277 			| tr '@\n' '\n@'
    278 		((${(j.|.)pipestatus})) && die "Could not read data"
    279 		statement_end
    280 		return
    281 	fi
    282 
    283 	# read and then print out, determinig trailing newline flags
    284 	local content flags
    285 	if (($remaining)); then
    286 		IFS= read -r -d '' -u 0 -k $remaining content \
    287 			|| die "Could not read file data"
    288 	fi
    289 	content=$head$content
    290 	flags=''
    291 	if [[ $content == *$'\n' ]]; then
    292 		content=${content%$'\n'}
    293 		if [[ $content == *$'\n' ]]; then
    294 			# force appending newline
    295 			flags+=n
    296 		fi
    297 	else
    298 		flags+=N
    299 	fi
    300 	if ! (($compact)) || [[ $content == *$'\t'* || $content == *$'\n'* ]]; then
    301 		statement C$flags$'\t'$content $'\n'
    302 	else
    303 		statement c$flags$'\t'$content
    304 	fi
    305 }
    306 
    307 ### Mainloop {{{1
    308 while read_cpio_header; do
    309 	process_file
    310 done