/usr/lib/python2.7/dist-packages/nltk/wsd.py is in python-nltk 3.2.1-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | # Natural Language Toolkit: Word Sense Disambiguation Algorithms
#
# Authors: Liling Tan <alvations@gmail.com>,
# Dmitrijs Milajevs <dimazest@gmail.com>
#
# Copyright (C) 2001-2016 NLTK Project
# URL: <http://nltk.org/>
# For license information, see LICENSE.TXT
from nltk.corpus import wordnet
def lesk(context_sentence, ambiguous_word, pos=None, synsets=None):
"""Return a synset for an ambiguous word in a context.
:param iter context_sentence: The context sentence where the ambiguous word
occurs, passed as an iterable of words.
:param str ambiguous_word: The ambiguous word that requires WSD.
:param str pos: A specified Part-of-Speech (POS).
:param iter synsets: Possible synsets of the ambiguous word.
:return: ``lesk_sense`` The Synset() object with the highest signature overlaps.
This function is an implementation of the original Lesk algorithm (1986) [1].
Usage example::
>>> lesk(['I', 'went', 'to', 'the', 'bank', 'to', 'deposit', 'money', '.'], 'bank', 'n')
Synset('savings_bank.n.02')
[1] Lesk, Michael. "Automatic sense disambiguation using machine
readable dictionaries: how to tell a pine cone from an ice cream
cone." Proceedings of the 5th Annual International Conference on
Systems Documentation. ACM, 1986.
http://dl.acm.org/citation.cfm?id=318728
"""
context = set(context_sentence)
if synsets is None:
synsets = wordnet.synsets(ambiguous_word)
if pos:
synsets = [ss for ss in synsets if str(ss.pos()) == pos]
if not synsets:
return None
_, sense = max(
(len(context.intersection(ss.definition().split())), ss) for ss in synsets
)
return sense
|