#!/bin/bash
# Re-writing to clean out cludge (hopefully)
# 02/11/2011
if [ $1 ]
   then WKPTH=$1
   else  echo -n "What path? " && read WKPTH
fi
if [ $2 ]
   then EMPTY=$2
   else echo -n "Show empty? (y/n) " && read EMPTY
fi
CURSIZE=$( du -c $WKPTH | tail -1 | awk '{ print $1 }' )
#[ $3 ] && HLINK=$3 || echo -n "Hardlink all? (y/n) " && read HLINK
echo "This might take a while: Please be patient."
find $WKPTH -type f -exec md5sum {} \; 2>/dev/null | grep -v dupemd5s\. | grep -v dupes\. | grep -v choices\. | grep -v notwins\.> /tmp/notwins.$$
sort /tmp/notwins.$$ | awk '{ print $1 }' | uniq -d > /tmp/dupemd5s.$$
if [ $EMPTY = n ]
   then grep -v d41d8cd98f00b204e9800998ecf8427e /tmp/dupemd5s.$$ > /tmp/dupemd5s.$$- && mv /tmp/dupemd5s.$$- /tmp/dupemd5s.$$
fi
for MD5 in `cat /tmp/dupemd5s.$$`; do
 grep $MD5 /tmp/notwins.$$ | awk '{ print $2 }' > /tmp/dupes.$$
 COUNT=$( wc -l /tmp/dupes.$$ | awk '{ print $1 }' ) && FILE=1
 : > /tmp/choices.$$
 echo -e "\n${COUNT} files have MD5SUM $MD5"
# echo -e "#  INODE\tFILENAME"
 while [ $COUNT -gt 0 ]; do
  echo "${FILE}: `head -1 /tmp/dupes.$$ | xargs ls -li | awk '{ print $1 "\t" $9 }' `" >> /tmp/choices.$$
  COUNT=$(( $COUNT - 1 )) && FILE=$(( $FILE + 1 ))
  tail -$COUNT /tmp/dupes.$$ > /tmp/dupes.$$- && mv /tmp/dupes.$$- /tmp/dupes.$$
 done
 CHOICES=$( wc -l /tmp/choices.$$ | awk '{ print $1 }' )
 while [ $CHOICES -ge 2 ]; do
  INODESUSED=$( awk '{ print $2 }' /tmp/choices.$$ | sort | uniq | wc -l | awk '{ print $1 }' )
  if [ $INODESUSED -lt 2 ]
   then : > /tmp/choices.$$ && echo "These are all already hardlinked!"
    CHOICES=$( wc -l /tmp/choices.$$ | awk '{ print $1 }' )
   else cat /tmp/choices.$$
    echo -n "
    d) Delete a file
    D) Delete ALL
    k) Keep/continue 
    h) Hardlink a file
    Q) Quit
     
    What would you like to do? "
    read ANSWER
    case $ANSWER in
     d ) echo -n "   Delete which file #? "
         read NEWANSWER
         FILERM=$( grep ^${NEWANSWER}: /tmp/choices.$$ | awk '{ print $3 }' )
         /bin/rm -i -- $FILERM 
         echo "Deleted $FILERM"
         grep -v "^${NEWANSWER}:" /tmp/choices.$$ > /tmp/choices.$$- && mv /tmp/choices.$$- /tmp/choices.$$
         ;;
     D ) for DEL in `cat /tmp/choices.$$ | awk '{ print $3 }'`
          do /bin/rm -i -- $DEL
         done
         : > /tmp/choices.$$
         ;;
     k ) echo "Keeping/continuing..."; : > /tmp/choices.$$ ;;
     h ) if [ `wc -l /tmp/choices.$$ | awk '{ print $1 }'` -eq 2 ];
            then NEWANSWER1=1
                 NEWANSWER2=2
            else NEWANSWER=0
                 while [ `echo $NEWANSWER | wc -c | awk '{ print $1 }'` -lt 4 ]; do
                       echo -n "Hardlink which two files? (# #) "
                       read NEWANSWER
                 done
                 NEWANSWER1=$( echo $NEWANSWER | awk '{ print $1 }' )
                 NEWANSWER2=$( echo $NEWANSWER | awk '{ print $2 }' )
         fi
         FILE1=$( grep ^${NEWANSWER1}: /tmp/choices.$$ | awk '{ print $3 }' )
         FILE2=$( grep ^${NEWANSWER2}: /tmp/choices.$$ | awk '{ print $3 }' )
         /bin/rm -f $FILE2
         ln $FILE1 $FILE2
         OLDINODE=$( grep $FILE2 /tmp/choices.$$ | awk '{ print $2 }' )
         NEWINODE=$( ls -li $FILE2 | awk '{ print $1 }' )
         sed -e "/$NEWANSWER2/s/$OLDINODE/$NEWINODE/g" /tmp/choices.$$ > /tmp/choices.$$- && mv /tmp/choices.$$- /tmp/choices.$$
         ;;
     Q ) echo "Exiting..."
         /bin/rm -f dupemd5s.$$ dupes.$$ choices.$$ notwins.$$
         exit 0 ;;
     * ) echo "Learn to type!" ;;
    esac
   CHOICES=$( wc -l /tmp/choices.$$ | awk '{ print $1 }' )
  fi
 done
done
echo "Done."
NEWSIZE=$( du -c $WKPTH | tail -1 | awk '{ print $1 }' )
echo "$WKPTH was $CURSIZE which was reduced to $NEWSIZE"
SAVED=$( dc -e "2 k $CURSIZE $NEWSIZE / p" )
echo "This saved you: ${SAVED}KB"
/bin/rm -f dupemd5s.$$ dupes.$$ choices.$$ notwins.$$












