#!/bin/zsh
setopt no_unset warn_create_global no_multibyte
zmodload zsh/stat

typeset -gA ftypes hardlinks s
typeset -g delim find fname ftype fmode

ftypes=(  # convert hex type to mnemonic character
	C  s  # socket
	c  s
	A  l  # symbolic link
	a  l
	8  f  # regular file
	6  b  # block device
	4  d  # directory
	2  u  # character device
	1  p  # FIFO
)

### Defaults for options passed by env {{{1

# TODO: make into command-line arguments
: ${compact:=1}
: ${print_m:=1}
: ${print_o:=1}
: ${print_c:=1}
: ${print_s:=1}
if (($+commands[file])) && (($+commands[base64])); then
	: ${print_b:=1}
else
	: ${print_b:=0}
	if (($+commands[file])) && (($+commands[xxd])); then
		: ${print_x:=1}
	fi
fi
: ${print_x:=0}
: ${max_bin_size:=}
: ${max_newline_size:=1024}

### Generic helpers {{{1

die() {
	printf '%s\n' "$@"
	exit 1
}

### FileSet writer functions {{{1

statement() {
	# start on new line for multiline statements - more readable
	if [[ -n "$delim" && $1 == *$'\n'* ]]; then
		delim=$'\n'
	fi
	printf '%s%s' $delim ${1//$'\n'/$'\n\t'}
	delim=${2:-$'\t'}
}

statement_end() {
	printf '\n'
	delim=''
}


process_file() {
	local -a find_info
	local t fmode owner size filename
	find_info=( $=1 )
	t=$find_info[1]
	fmode=$find_info[2]
	owner=$find_info[3]
	size=$find_info[4]

	# --- print stuff ---

	(($compact)) || printf '\n'

	filename="$fname"
	if [[ $filename == . ]]; then
		filename=/
	elif [[ $filename == ./* ]]; then
		filename=$filename[2,-1]
	fi
	filename=${filename#/}

	# printf "%s %s %s\n" >&2 $t $fmode "${(qqq)filename}"

	if [[ $filename == *$'\t'* || $filename == *$'\n'* ]]; then
		statement $'P\t'$filename $'\t'
	else
		if (($compact)); then
			statement /${filename}
		else
			statement /${filename} $'\n'
		fi
	fi

	# Note: not supporting hardlinks (yet)

	if [[ $t == [bu] ]]; then
		zstat -LH s $fname || die "stat failed on ${(qqq)fname}"
		statement $t$(( $s[rdev] >> 8 )):$(( $s[rdev] & 255 ))
	elif [[ $t == l ]]; then
		zstat -LH s $fname || die "stat failed on ${(qqq)fname}"
		statement $'l\t'$s[link] $'\t'
	elif [[ $t == f ]]; then
		if (($print_c)); then
			process_file_data $size
		else
			statement f
			if (($print_s)); then
				statement s$'\tSHA512:'${"$(sha512sum <$fname)"%% *} \
					|| die "Could not read ${(qqq)fname}"
			fi
		fi
	else
		statement $t
	fi

	(($print_o)) && statement o$owner
	(($print_m)) && statement m$fmode
	statement_end
}

process_file_data() {
	local size
	size=$1
	if ! (($size)); then
		statement cN$'\t'  # empty file
		return
	fi

	# print binary representation?
	if (($print_b | $print_x)) && \
		[[ $(file -bi "$fname") != text/* ]]
	then
		if (($size > 256)); then
			if [[ -n $max_bin_size && $size -gt $max_bin_size ]]; then
				statement s$'\tSHA512:'${"$(sha512sum <$fname)"%% *} \
					|| die "Could not read ${(qqq)fname}"
			elif (($print_x)); then
				if [[ -n "$delim" ]]; then
					printf '\nX'
				else
					printf 'X'
				fi
				xxd <$fname | sed 's/^/\t/'
				((${(j.|.)pipestatus})) && die "Could not read ${(qqq)fname}"
				statement_end
			else
				if [[ -n "$delim" ]]; then
					printf '\nB'
				else
					printf 'B'
				fi
				 base64 <$fname | sed 's/^/\t/'
				 ((${(j.|.)pipestatus})) && die "Could not read ${(qqq)fname}"
				statement_end
			fi
		else
			if [[ -n $max_bin_size && $size -gt $max_bin_size ]]; then
				statement s$'\tSHA512:'${"$(sha512sum <$fname)"%% *}
			elif (($print_x)); then
				statement X$'\t'"$(xxd <$fname)" $'\n'
			else
				statement B$'\t'"$(base64 <$fname)" $'\n'
			fi
		fi
		return
	fi
	# print text

	# if file is longer than this, always use CN
	if (($size > $max_newline_size)); then
		if [[ -n "$delim" ]]; then
			printf '\nCN\t'
		else
			printf 'CN\t'
		fi
		# Swap NL with @ so trailing newline is handled correctly
		tr <$fname '\n@' '@\n' \
			| sed 's/@/@\t/g' \
			| tr '@\n' '\n@'
		((${(j.|.)pipestatus})) && die "Could not read ${(qqq)fname}"
		statement_end
		return
	fi

	# read and then print out, determinig trailing newline flags
	local content flags
	content="$(<$fname)"
	flags=''
	if [[ $content == *$'\n' ]]; then
		content=${content%$'\n'}
		if [[ $content == *$'\n' ]]; then
			# force appending newline
			flags+=n
		fi
	else
		flags+=N
	fi
	if ! (($compact)) || [[ $content == *$'\t'* || $content == *$'\n'* ]]; then
		statement C$flags$'\t'$content $'\n'
	else
		statement c$flags$'\t'$content
	fi
}

### Mainloop {{{1
if (($+ROOT)) && [[ -n $ROOT ]]; then
	cd $ROOT || exit $?
fi

find "$@" -printf '%y %m %U:%G %s\t%p\0' \
	| sort -z -t $'\t' -k 2 \
	| while IFS=$'\t' read -r -d $'\0' find fname
do
	process_file "$find" "$fname"
done