This file is indexed.

/usr/share/mu/scripts/find-dups.scm is in maildir-utils 0.9.18-2build3.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
#!/bin/sh
exec guile -e main -s $0 $@
!#
;;
;; Copyright (C) 2013-2015 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
;;
;; This program is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by the
;; Free Software Foundation; either version 3, or (at your option) any
;; later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.
;;

;; You should have received a copy of the GNU General Public License
;; along with this program; if not, write to the Free Software Foundation,
;; Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

;; INFO: find duplicate messages
;; INFO: options:
;; INFO:   --muhome=<muhome>: path to mu home dir
;; INFO:   --delete: delete all but the first one (experimental, be careful!)

(use-modules (mu) (mu script) (mu stats))
(use-modules (ice-9 getopt-long) (ice-9 optargs)
  (ice-9 popen) (ice-9 format) (ice-9 rdelim))

(define (md5sum path)
  (let* ((port (open-pipe* OPEN_READ "md5sum" path))
         (md5 (read-delimited " " port)))
    (close-pipe port)
    md5))
 
(define (find-dups delete expr)
  (let ((id-table (make-hash-table 20000)))
    ;; fill the hash with <msgid-size> => <list of paths>
    (mu:for-each-message
      (lambda (msg)
	(let* ((id (format #f "~a-~d" (mu:message-id msg)
		    (mu:size msg)))
	       (lst (hash-ref id-table id)))
	  (if lst
	    (set! lst (cons (mu:path msg) lst))
	    (set! lst (list (mu:path msg))))
	  (hash-set! id-table id lst)))
      expr)
    ;; list all the paths with multiple elements; check the md5sum to
    ;; make 100%-minus-ε sure they are really the same file.
    (hash-for-each
      (lambda (id paths)
	(if (> (length paths) 1)
	  (let ((hash (make-hash-table 10)))
	    (for-each
             (lambda (path)
               (when (file-exists? path)
                     (let* ((md5 (md5sum path)) (lst (hash-ref hash md5)))
                       (if lst
                           (set! lst (cons path lst))
                           (set! lst (list path)))
                       (hash-set! hash md5 lst))))
 	      paths)
	    ;; hash now maps the md5sum to the messages...
	    (hash-for-each
	      (lambda (md5 mpaths)
	    	(if (> (length mpaths) 1)
		  (begin
		    ;;(format #t "md5sum: ~a:\n" md5)
		    (let ((num 1))
		      (for-each
                       (lambda (path)
                         (if (equal? num 1)
			   (format #t "~a\n" path)
			   (begin
			     (format #t "~a: ~a\n" (if delete "deleting" "dup") path)
			     (if delete (delete-file path))))
                         (set! num (+ 1 num)))
			mpaths)))))
	      hash))))
      id-table)))



(define (main args)
  "Find duplicate messages and, potentially, delete the dups.
   Be careful with that!
Interpret argument-list ARGS (like command-line
arguments). Possible arguments are:
  --muhome (path to alternative mu home directory).
  --delete (delete all but the first one). Run mu index afterwards.
  --expr   (expression to constrain search)."
  (setlocale LC_ALL "")
  (let* ((optionspec   '( (muhome     (value #t))
                          (delete     (value #f))
			  (expr       (value #t))
			  (help       (single-char #\h) (value #f))))
	  (options (getopt-long args optionspec))
	  (help (option-ref options 'help #f))
	  (delete (option-ref options 'delete #f))
	  (expr (option-ref options 'expr #t))
	  (muhome (option-ref options 'muhome #f)))
    (mu:initialize muhome)
    (find-dups delete expr)))


;; Local Variables:
;; mode: scheme
;; End: