/usr/share/pyshared/smart/util/distance.py is in python-smartpm 1.4-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | #
# Copyright (c) 2005 Conectiva, Inc.
#
# Written by Gustavo Niemeyer <niemeyer@conectiva.com>
#
# This file is part of Smart Package Manager.
#
# Smart Package Manager is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as published
# by the Free Software Foundation; either version 2 of the License, or (at
# your option) any later version.
#
# Smart Package Manager is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Smart Package Manager; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
def distance(a, b, cutoff=None):
"""
Compute Levenhstein distance - http://www.merriampark.com/ld.htm
"""
if a == b:
return 0, 1.0
al = len(a)
bl = len(b)
if al > bl:
a, al, b, bl = b, bl, a, al
if cutoff and type(cutoff) is float:
cutoff = int(bl-cutoff*bl)
lst = range(1,bl+1)
for ai in range(al):
last, lst[0] = lst[0], min(lst[0]+1, ai+(b[0] != a[ai]))
for bi in range(1, bl):
last, lst[bi] = lst[bi], min(lst[bi-1]+1, lst[bi]+1,
last+(b[bi] != b[ai]))
if cutoff is not None and min(lst) > cutoff:
return bl, 0.0
res = lst[-1]
if cutoff is not None and res > cutoff:
return bl, 0.0
return res, float(bl-res)/bl
def globdistance(a, b, cutoff=None, ignorecase=False):
"""
Compute Levenhstein distance - http://www.merriampark.com/ld.htm
Algorithm changed by Gustavo Niemeyer to implement wildcards support.
"""
if ignorecase:
a = a.lower()
b = b.lower()
if a == b:
return 0, 1.0
wildstart = False
if a.startswith("*"):
wildstart = True
a = a.lstrip("*")
al = len(a)
bl = len(b)
if bl == 0:
return al, 0.0
maxl = al > bl and al or bl
if cutoff and type(cutoff) is float:
cutoff = int(maxl-cutoff*maxl)
if wildstart:
lst = [0]*bl
else:
lst = range(1,bl+1)
for ai in range(al):
if a[ai] == "*":
last, lst[0] = lst[0], min(lst[0], ai)
for bi in range(1,bl):
last, lst[bi] = lst[bi], min(lst[bi-1], lst[bi], last)
elif a[ai] == "?":
last, lst[0] = lst[0], min(lst[0]+1, ai)
for bi in range(1,bl):
last, lst[bi] = lst[bi], min(lst[bi-1]+1, lst[bi]+1, last)
else:
last, lst[0] = lst[0], min(lst[0]+1, ai+(b[0] != a[ai]))
for bi in range(1, bl):
last, lst[bi] = lst[bi], min(lst[bi-1]+1, lst[bi]+1,
last+(b[bi] != a[ai]))
if cutoff is not None and min(lst) > cutoff:
return bl, 0.0
res = lst[-1]
if cutoff is not None and res > cutoff:
return bl, 0.0
return res, float(maxl-res)/maxl
from cdistance import *
|