/usr/lib/picard/picard/similarity.py is in picard 1.2-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | # -*- coding: utf-8 -*-
#
# Picard, the next-generation MusicBrainz tagger
# Copyright (C) 2006 Lukáš Lalinský
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import re
from picard.util import strip_non_alnum
from picard.util.astrcmp import astrcmp
_replace_words = {
"disc 1": "CD1",
"disc 2": "CD2",
"disc 3": "CD3",
"disc 4": "CD4",
"disc 5": "CD5",
"disc 6": "CD6",
"disc 7": "CD7",
"disc 8": "CD8",
}
def normalize(orig_string):
"""Strips non-alphanumeric characters from a string unless doing so would make it blank."""
string = strip_non_alnum(orig_string.lower())
if not string:
string = orig_string
return string
def similarity(a1, b1):
"""Calculates similarity of single words as a function of their edit distance."""
a2 = normalize(a1)
if a2:
b2 = normalize(b1)
else:
b2 = ""
return astrcmp(a2, b2)
_split_words_re = re.compile('\W+', re.UNICODE)
def similarity2(a, b):
"""Calculates similarity of a multi-word strings."""
alist = filter(bool, _split_words_re.split(a.lower()))
blist = filter(bool, _split_words_re.split(b.lower()))
total = 0
score = 0.0
if len(alist) > len(blist):
alist, blist = blist, alist
for a in alist:
ms = 0.0
mp = None
for position, b in enumerate(blist):
s = astrcmp(a, b)
if s > ms:
ms = s
mp = position
if mp is not None:
score += ms
if ms > 0.6:
del blist[mp]
total += 1
total += len(blist) * 0.4
if total:
return score / total
else:
return 0
|