#!/bin/zsh
zparseopts -D \
0=E -empty=E \
p=P -pretend=P \
h=H -help=H \
v=V -verbose=F \
d=D -debug=D
if [[ -n $H ]]; then cat <<END
usage: ${0:t} [options]
Deduplicate data using hardlinks using a report generated by duff or fdupes
options:
-0 --empty allow hardlinking empty files
-p --pretend print out the commands that would be performed
-v --verbose pass verbose flags to ln and mv
-d --debug enable debugging output
-h --help this help
END
exit 0
fi
(($#D)) && set -x
zmodload zsh/stat
typeset -A stat inodes metas
while IFS= read line; do
if [[ $line == [./]* ]]; then
# got relative or absolute path
zstat -LH stat $line || continue
# check for empty files
(( $stat[size] || $#E )) || continue
# check if we handled this inode already
ino=D$stat[device]_I$stat[inode]
(( $+inodes[$ino] )) && continue
# construct metadata that have to be same so we can hardlink
meta=D$stat[device]_P$stat[mode]_U$stat[uid]_G$stat[gid]_M$stat[mtime]
if (( $+metas[$meta] )); then
tmp=$line:h/.tmp.$$.$line:t
if [[ -e $tmp ]]; then
echo >&2 "Temporary file $tmp already exists!"
continue
fi
if (($#P)); then
echo ln $V ${(qqq)metas[$meta]} ${(qqq)tmp} '&&' \
mv $V ${(qqq)tmp} ${(qqq)line}
else
ln $V $metas[$meta] $tmp && mv $V $tmp $line || exit $?
fi
else
inodes[$ino]=$line
metas[$meta]=$line
fi
else
# end of same-file block, clear known metadata
inodes=( )
meta=( )
fi
done