#!/bin/zsh
zparseopts -D \
	0=E -empty=E \
	p=P -pretend=P \
	h=H -help=H \
	v=V -verbose=F \
	d=D -debug=D

if [[ -n $H ]]; then cat <<END
usage: ${0:t} [options]

Deduplicate data using hardlinks using a report generated by duff or fdupes

options:
	-0 --empty          allow hardlinking empty files
	-p --pretend        print out the commands that would be performed
	-v --verbose        pass verbose flags to ln and mv
	-d --debug          enable debugging output
	-h --help           this help
END
	exit 0
fi

(($#D)) && set -x

zmodload zsh/stat
typeset -A stat inodes metas

while IFS= read line; do
	if [[ $line == [./]* ]]; then
		# got relative or absolute path
		zstat -LH stat $line || continue

		# check for empty files
		(( $stat[size] || $#E )) || continue

		# check if we handled this inode already
		ino=D$stat[device]_I$stat[inode]
		(( $+inodes[$ino] )) && continue

		# construct metadata that have to be same so we can hardlink
		meta=D$stat[device]_P$stat[mode]_U$stat[uid]_G$stat[gid]_M$stat[mtime]
		if (( $+metas[$meta] )); then
			tmp=$line:h/.tmp.$$.$line:t
			if [[ -e $tmp ]]; then
				echo >&2 "Temporary file $tmp already exists!"
				continue
			fi
			if (($#P)); then
				echo ln $V ${(qqq)metas[$meta]} ${(qqq)tmp} '&&' \
					mv $V ${(qqq)tmp} ${(qqq)line}
			else
				ln $V $metas[$meta] $tmp && mv $V $tmp $line || exit $?
			fi
		else
			inodes[$ino]=$line
			metas[$meta]=$line
		fi
	else
		# end of same-file block, clear known metadata
		inodes=( )
		meta=( )
	fi
done