cpio2fs (7499B)
1 #!/bin/zsh 2 # Reads cpio archive and emits fileset file 3 setopt no_unset warn_create_global no_multibyte 4 5 typeset -gA cpio_struct ftypes hardlinks 6 typeset -ga cpio_oldbin_fields 7 typeset -g delim filename ftype fmode 8 9 ftypes=( # convert hex type to mnemonic character 10 C s # socket 11 c s 12 A l # symbolic link 13 a l 14 8 f # regular file 15 6 b # block device 16 4 d # directory 17 2 u # character device 18 1 p # FIFO 19 ) 20 21 cpio_oldbin_fields=( 22 c_magic 6 23 c_dev 6 24 c_ino 6 25 c_mode 6 26 c_uid 6 27 c_gid 6 28 c_nlink 6 29 c_rdev 6 30 c_mtime 11 31 c_namesize 6 32 c_filesize 11 33 ) 34 35 ### Defaults for options passed by env {{{1 36 37 # TODO: make into command-line arguments 38 : ${compact:=1} 39 : ${print_m:=1} 40 : ${print_o:=1} 41 : ${print_c:=1} 42 if (($+commands[file])) && (($+commands[base64])); then 43 : ${print_b:=1} 44 else 45 : ${print_b:=0} 46 if (($+commands[file])) && (($+commands[xxd])); then 47 : ${print_x:=1} 48 fi 49 fi 50 : ${print_x:=0} 51 : ${max_bin_size:=} 52 : ${max_newline_size:=1024} 53 54 55 ### Generic helpers {{{1 56 57 die() { 58 printf '%s\n' "$@" 59 exit 1 60 } 61 62 63 ### cpio header reader functions {{{1 64 65 read_oldc_header() { 66 local field bytes 67 for field size in $cpio_oldbin_fields; do 68 IFS= read -k $size -u 0 bytes || die "Short read of cpio archive header" 69 [[ $bytes =~ '^[0-7]{'$size'}$' ]] \ 70 || die "Invalid octal header value: ${(qqq)bytes}" 71 cpio_struct[$field]=$(( [##8] $bytes )) 72 done 73 cpio_struct[rdev_major]=$(( $cpio_struct[rdev] >> 8 )) 74 cpio_struct[rdev_minor]=$(( $cpio_struct[rdev] & 255 )) 75 } 76 77 read_newc_header() { 78 local field bytes 79 cpio_struct=( ) 80 for field in c_ino c_mode c_uid c_gid c_nlink c_mtime c_filesize \ 81 c_devmajor c_devminor c_rdevmajor c_rdevminor c_namesize c_check 82 do 83 IFS= read -k 8 -u 0 bytes || die "Short read of cpio archive header" 84 [[ $bytes =~ '^[0-9a-fA-F]{8}$' ]] \ 85 || die "Invalid hexadecimal header value: ${(qqq)bytes}" 86 cpio_struct[$field]=$(( 0x$bytes )) 87 done 88 cpio_struct[c_dev]=$cpio_struct[c_devmajor]:$cpio_struct[c_devminor] 89 cpio_struct[path_padding]=$[ 3-((3+2+$cpio_struct[c_namesize])%4 ) ] 90 cpio_struct[data_padding]=$[ 3-((3+$cpio_struct[c_filesize])%4) ] 91 } 92 93 read_cpio_header() { 94 local magic_bytes 95 IFS= read -k 6 -u 0 magic_bytes || return $? 96 cpio_struct=( 97 c_magic $magic_bytes 98 path_padding 0 99 data_padding 0 100 ) 101 case "$cpio_struct[c_magic]" in 102 (070707) # oldc (aka Portable ASCII Format) 103 read_oldc_header;; 104 (070701) # newc (aka New ASCII Format) 105 read_newc_header;; 106 (070702) # crc (aka New CRC Format) is same in structure as newc 107 read_newc_header;; 108 (*) 109 die "Unknown cpio format number: ${(qqq)cpio_struct[c_magic]}";; 110 esac 111 } 112 113 ### FileSet writer functions {{{1 114 115 statement() { 116 # start on new line for multiline statements - more readable 117 if [[ -n "$delim" && $1 == *$'\n'* ]]; then 118 delim=$'\n' 119 fi 120 printf '%s%s' $delim ${1//$'\n'/$'\n\t'} 121 delim=${2:-$'\t'} 122 } 123 124 statement_end() { 125 printf '\n' 126 delim='' 127 } 128 129 130 process_file() { 131 local filename ftype fmode t padding link 132 IFS= read -k $[$cpio_struct[c_namesize]-1] -u 0 filename \ 133 || die "Could not read filename" 134 IFS= read -k $[1 + $cpio_struct[path_padding]] -u 0 padding \ 135 || die "Could not read filename" 136 if [[ $filename == "TRAILER!!!"* ]]; then 137 return 138 fi 139 ftype=$(( [##16] $cpio_struct[c_mode] >> 12 )) 140 fmode=$(( [##8] $cpio_struct[c_mode] & 8#7777 )) 141 t=$ftypes[$ftype] 142 #printf "%s %s %s\n" >&2 $t $fmode "${(qqq)filename}" 143 144 # --- print stuff --- 145 146 (($compact)) || printf '\n' 147 148 if [[ $filename == . ]]; then 149 filename=/ 150 elif [[ $filename == ./* ]]; then 151 filename=$filename[2,-1] 152 fi 153 if [[ $filename == *$'\t'* || $filename == *$'\n'* ]]; then 154 statement $'P\t'$filename $'\t' 155 else 156 if (($compact)); then 157 statement /${filename%/} 158 else 159 statement /${filename%/} $'\n' 160 fi 161 fi 162 163 # Note: not supporting hardlinks 164 165 if [[ $t == [bu] ]]; then 166 statement $t$cpio_struct[rdev_major]:$cpio_struct[rdev_minor] 167 (($cpio_struct[c_filesize])) && die "Unexpected data" 168 elif [[ $t == l ]]; then 169 (($cpio_struct[c_filesize])) || die "Missing data" 170 IFS= read -k $cpio_struct[c_filesize] -u 0 link \ 171 || die "Could not read symbolic link data" 172 statement $'l\t'$link $'\t' 173 elif [[ $t == f ]]; then 174 if (($print_c)); then 175 process_file_data $cpio_struct[c_filesize] 176 else 177 statement f 178 head -c$cpio_struct[c_filesize] >/dev/null \ 179 die "Could not read data" 180 fi 181 else 182 statement $t 183 (($cpio_struct[c_filesize])) && die "Unexpected data" 184 fi 185 186 (($print_o)) && statement o$cpio_struct[c_uid]:$cpio_struct[c_gid] 187 (($print_m)) && statement m$fmode 188 statement_end 189 190 # --- read padding --- 191 if (($cpio_struct[data_padding])); then 192 IFS= read -k $cpio_struct[data_padding] -u 0 padding \ 193 || die "Could not read padding" 194 # dd status=none count=1 bs=$cpio_struct[data_padding] | xxd 195 fi 196 } 197 198 prhead() { 199 local remaining 200 remaining=$1 201 shift 202 printf '%s' "$@" || exit $? 203 if (($remaining)); then 204 head -c$remaining || exit $? 205 fi 206 } 207 208 process_file_data() { 209 local size head remaining 210 local -a print_data 211 size=$1 212 if ! (($cpio_struct[c_filesize])); then 213 statement cN$'\t' # empty file 214 return 215 fi 216 217 if (($size > 256)); then # read up to 256 bytes to determine file type 218 IFS= read -k 256 -u 0 head || die "Could not read file data" 219 remaining=$[ $cpio_struct[c_filesize] - 256 ] 220 else 221 IFS= read -k $cpio_struct[c_filesize] -u 0 head \ 222 || die "Could not read file data" 223 remaining=0 224 fi 225 226 # print binary representation? 227 if (($print_b | $print_x)) && \ 228 [[ $(printf '%s' "$head" | file -bi -) != text/* ]] 229 then 230 if (($remaining)); then 231 if [[ -n $max_bin_size && $size -gt $max_bin_size ]]; then 232 statement s$'\tSHA512:'${"$(prhead $remaining "$head" | sha512sum)"%% *} \ 233 || die "Could not read data" 234 elif (($print_x)); then 235 if [[ -n "$delim" ]]; then 236 printf '\nX' 237 else 238 printf 'X' 239 fi 240 prhead $remaining "$head" | xxd | sed 's/^/\t/' 241 ((${(j.|.)pipestatus})) && die "Could not read data" 242 statement_end 243 else 244 if [[ -n "$delim" ]]; then 245 printf '\nB' 246 else 247 printf 'B' 248 fi 249 prhead $remaining "$head" | base64 | sed 's/^/\t/' 250 ((${(j.|.)pipestatus})) && die "Could not read data" 251 statement_end 252 fi 253 else 254 if [[ -n $max_bin_size && $size -gt $max_bin_size ]]; then 255 statement s$'\tSHA512:'${"$(printf '%s' "$head" | sha512sum)"%% *} 256 elif (($print_x)); then 257 statement X$'\t'"$(printf '%s' "$head" | xxd)" $'\n' 258 else 259 statement B$'\t'"$(printf '%s' "$head" | base64)" $'\n' 260 fi 261 fi 262 return 263 fi 264 # print text 265 266 # if file is longer than this, always use CN 267 if (($size > $max_newline_size)); then 268 if [[ -n "$delim" ]]; then 269 printf '\nCN\t' 270 else 271 printf 'CN\t' 272 fi 273 # Swap NL with @ so trailing newline is handled correctly 274 prhead $remaining "$head" \ 275 | tr '\n@' '@\n' \ 276 | sed 's/@/@\t/g' \ 277 | tr '@\n' '\n@' 278 ((${(j.|.)pipestatus})) && die "Could not read data" 279 statement_end 280 return 281 fi 282 283 # read and then print out, determinig trailing newline flags 284 local content flags 285 if (($remaining)); then 286 IFS= read -r -d '' -u 0 -k $remaining content \ 287 || die "Could not read file data" 288 fi 289 content=$head$content 290 flags='' 291 if [[ $content == *$'\n' ]]; then 292 content=${content%$'\n'} 293 if [[ $content == *$'\n' ]]; then 294 # force appending newline 295 flags+=n 296 fi 297 else 298 flags+=N 299 fi 300 if ! (($compact)) || [[ $content == *$'\t'* || $content == *$'\n'* ]]; then 301 statement C$flags$'\t'$content $'\n' 302 else 303 statement c$flags$'\t'$content 304 fi 305 } 306 307 ### Mainloop {{{1 308 while read_cpio_header; do 309 process_file 310 done