diff options
author | Ben Sima <ben@bensima.com> | 2025-04-18 12:52:41 -0400 |
---|---|---|
committer | Ben Sima <ben@bensima.com> | 2025-04-18 12:52:41 -0400 |
commit | a6d62663e20fb5d1359845017787c85c4b10746f (patch) | |
tree | 3d949ed83abacb80c2ac0c50a6f52ef5087febb8 | |
parent | 7cf00fd9819f663cd411f1079cbbd22127ec7941 (diff) |
mail duplicates one-off script
-rwxr-xr-x | find-mail-duplicates.sh | 60 | ||||
-rwxr-xr-x | fix-duplicate-mail-uids.sh | 86 | ||||
-rwxr-xr-x | fix-mail-duplicates.sh | 85 |
3 files changed, 145 insertions, 86 deletions
diff --git a/find-mail-duplicates.sh b/find-mail-duplicates.sh new file mode 100755 index 0000000..c8ae097 --- /dev/null +++ b/find-mail-duplicates.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +# find-mail-duplicates.sh +# Usage: find-mail-duplicates.sh /path/to/maildir + +set -e + +# Check if a maildir path was provided +if [ $# -lt 1 ]; then + echo "Usage: $0 /path/to/maildir" + echo "Example: $0 ~/Mail/ben@bensima.com/Trash" + exit 1 +fi + +MAILDIR="$1" + +# Validate the maildir +if [ ! -d "$MAILDIR" ]; then + echo "Error: Maildir '$MAILDIR' does not exist or is not a directory" + exit 1 +fi + + +# Find all UIDs in the maildir +TMP_UID_FILE=$(mktemp) +find "$MAILDIR" -type f -name "*,U=*" | sed -E 's/.*,U=([0-9]+)[,:].*/\1/' | sort > "$TMP_UID_FILE" + +# Find duplicated UIDs +DUPLICATE_UIDS=$(uniq -d "$TMP_UID_FILE") + +# Count duplicates +DUPLICATE_COUNT=$(echo "$DUPLICATE_UIDS" | wc -l) +if [ -z "$DUPLICATE_UIDS" ]; then + echo "No duplicate UIDs found in $MAILDIR" + rm "$TMP_UID_FILE" + exit 0 +fi + + +# Process each duplicate UID +for UID_NUM in $DUPLICATE_UIDS; do + echo "$MAILDIR $UID_NUM" + # echo "--------------------------------------------" + # echo "Duplicate UID: $UID_NUM" + + # # Find messages with this UID + # DUPLICATE_FILES=$(find "$MAILDIR" -type f -name "*,U=$UID_NUM[,:]*") + # FILE_COUNT=$(echo "$DUPLICATE_FILES" | wc -l) + + # echo "Found $FILE_COUNT messages with UID $UID_NUM" + + # # List the duplicates + # for f in $DUPLICATE_FILES; do + # echo " $f" + # done + + # echo "To fix these duplicates, run:" + # echo " fix-mail-duplicates.sh \"$MAILDIR\" $UID_NUM" +done + +rm "$TMP_UID_FILE" diff --git a/fix-duplicate-mail-uids.sh b/fix-duplicate-mail-uids.sh deleted file mode 100755 index 5543fe9..0000000 --- a/fix-duplicate-mail-uids.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env bash -# -# fix-duplicate-uids.sh - Automatically fix duplicate UIDs in maildir -# -# this is needed because sometimes iOS mail and mbsync download or move the same -# file, and i end up with UID collisions. if i just delete the UID from the -# filename, then mu will generate a new one that doesn't collide. - -# Configuration - edit these paths as needed -MAIL_DIR="$HOME/Mail" - -# Color output helpers -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' # No Color - -# Create secure temporary files -TEMP_FILE=$(mktemp) -trap 'rm -f "$TEMP_FILE"' EXIT - -# Process each mailbox separately -find "$MAIL_DIR" -type d -name cur | while read -r mailbox_cur; do - # Get the actual mailbox path (parent of cur) - mailbox=$(dirname "$mailbox_cur") - mailbox_name=$(basename "$mailbox") - - echo -e "${YELLOW}Checking mailbox: ${mailbox}${NC}" - - # Find duplicate UIDs in this specific mailbox - find "$mailbox_cur" -type f -name "*,U=*:*" \ - | sed -E 's/.*,U=([0-9]+).*/\1/' \ - | sort \ - | uniq -c \ - | awk '$1 > 1 {print $2}' \ - > "$TEMP_FILE" - - if [ ! -s "$TEMP_FILE" ]; then - echo -e " ${GREEN}No duplicate UIDs found.${NC}" - continue - fi - - echo -e " ${YELLOW}Found $(wc -l < "$TEMP_FILE") duplicate UID(s).${NC}" - - # Process each duplicate UID - while read -r uid; do - echo -e " ${YELLOW}Fixing duplicate UID: ${uid}${NC}" - - # Find all files with this UID in the current mailbox - files=$(find "$mailbox_cur" -type f -name "*,U=${uid}:*" | sort) - - # Find the oldest file with this UID (by creation time) - oldest_file="" - oldest_time=9999999999 - - while read -r file; do - # Extract the timestamp from the filename (typical maildir format) - timestamp=$(basename "$file" | cut -d. -f1) - if [[ $timestamp =~ ^[0-9]+$ ]]; then - if [ "$timestamp" -lt "$oldest_time" ]; then - oldest_time=$timestamp - oldest_file=$file - fi - else - # If we can't extract timestamp from filename, use the file's mtime - file_time=$(stat -c %Y "$file") - if [ "$file_time" -lt "$oldest_time" ]; then - oldest_time=$file_time - oldest_file=$file - fi - fi - done <<< "$files" - - echo " Keeping original: $(basename "$oldest_file")" - - # Rename all other files with this UID (removing the UID part) - while read -r file; do - if [ "$file" != "$oldest_file" ]; then - new_file=$(echo "$file" | sed -E 's/(.*),U=[0-9]+:(.*)/\1\2/') - echo " Renaming: $(basename "$file") → $(basename "$new_file")" - mv "$file" "$new_file" - fi - done <<< "$files" - done < "$TEMP_FILE" -done - -echo -e "${GREEN}Fix complete! Run 'mbsync -a' to resync with new UIDs.${NC}" diff --git a/fix-mail-duplicates.sh b/fix-mail-duplicates.sh new file mode 100755 index 0000000..7803649 --- /dev/null +++ b/fix-mail-duplicates.sh @@ -0,0 +1,85 @@ +#!/usr/bin/env bash +# fix-mail-duplicates.sh +# Usage: fix-mail-duplicates.sh /path/to/maildir 682 + +set -e + +# Check if arguments were provided +if [ $# -ne 2 ]; then + echo "Usage: $0 /path/to/maildir UID_NUMBER" + echo "Example: $0 ~/Mail/ben@bensima.com/Archive 682" + exit 1 +fi + +MAILDIR="$1" +UID_NUM="$2" + +# Validate the arguments +if [ ! -d "$MAILDIR" ]; then + echo "Error: Maildir '$MAILDIR' does not exist or is not a directory" + exit 1 +fi + +if ! [[ "$UID_NUM" =~ ^[0-9]+$ ]]; then + echo "Error: UID must be a number, got '$UID_NUM'" + exit 1 +fi + +echo "Looking for duplicate UID $UID_NUM in $MAILDIR..." + +# Find all files with the EXACT UID (using word boundaries) +DUPLICATE_FILES=$(find "$MAILDIR" -type f -name "*,U=$UID_NUM[,:]*") +FILE_COUNT=$(echo "$DUPLICATE_FILES" | wc -l) + +if [ "$FILE_COUNT" -le 1 ]; then + echo "No duplicates found. Nothing to fix." + exit 0 +fi + +echo "Found $FILE_COUNT files with the same UID $UID_NUM" + +# Get timestamps for each file to determine which is newer +declare -A FILE_TIMES +for f in $DUPLICATE_FILES; do + # Get the message Date: header if possible, otherwise use file time + if command -v formail >/dev/null 2>&1; then + DATE_HDR=$(formail -xDate: < "$f" 2>/dev/null) + if [ -n "$DATE_HDR" ]; then + # Convert to epoch seconds if possible + MSG_TIME=$(date -d "$DATE_HDR" +%s 2>/dev/null) + if [ $? -eq 0 ]; then + FILE_TIMES["$f"]="$MSG_TIME" + continue + fi + fi + fi + + # Fallback to file modification time + FILE_TIMES["$f"]=$(stat -c %Y "$f") +done + +# Find the oldest file (we'll keep its UID) +OLDEST_FILE="" +OLDEST_TIME=9999999999 +for f in "${!FILE_TIMES[@]}"; do + if [ "${FILE_TIMES[$f]}" -lt "$OLDEST_TIME" ]; then + OLDEST_TIME="${FILE_TIMES[$f]}" + OLDEST_FILE="$f" + fi +done + +echo "Keeping original UID on oldest file: $OLDEST_FILE" + +# Rename all newer files by removing the UID part +for f in $DUPLICATE_FILES; do + if [ "$f" != "$OLDEST_FILE" ]; then + # More precise sed pattern to avoid partial UID matches + NEW_NAME=$(echo "$f" | sed "s/,U=$UID_NUM\([,:]\)/, \1/" | sed "s/:2,/,/") + echo "Renaming newer duplicate:" + echo " From: $f" + echo " To: $NEW_NAME" + mv "$f" "$NEW_NAME" + fi +done + +echo "Fixed UID collision for UID $UID_NUM. Please run mbsync again." |