/usr/lib/python3/dist-packages/slugify/slugify.py is in python3-slugify 1.2.4-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | import re
import unicodedata
import types
import sys
try:
from htmlentitydefs import name2codepoint
_unicode = unicode
_unicode_type = types.UnicodeType
except ImportError:
from html.entities import name2codepoint
_unicode = str
_unicode_type = str
unichr = chr
import unidecode
__all__ = ['slugify', 'smart_truncate']
CHAR_ENTITY_PATTERN = re.compile('&(%s);' % '|'.join(name2codepoint))
DECIMAL_PATTERN = re.compile('&#(\d+);')
HEX_PATTERN = re.compile('&#x([\da-fA-F]+);')
QUOTE_PATTERN = re.compile(r'[\']+')
ALLOWED_CHARS_PATTERN = re.compile(r'[^-a-z0-9]+')
DUPLICATE_DASH_PATTERN = re.compile('-{2,}')
NUMBERS_PATTERN = re.compile('(?<=\d),(?=\d)')
DEFAULT_SEPARATOR = '-'
def smart_truncate(string, max_length=0, word_boundaries=False, separator=' ', save_order=False):
"""
Truncate a string.
:param string (str): string for modification
:param max_length (int): output string length
:param word_boundaries (bool):
:param save_order (bool): if True then word order of output string is like input string
:param separator (str): separator between words
:return:
"""
string = string.strip(separator)
if not max_length:
return string
if len(string) < max_length:
return string
if not word_boundaries:
return string[:max_length].strip(separator)
if separator not in string:
return string[:max_length]
truncated = ''
for word in string.split(separator):
if word:
next_len = len(truncated) + len(word)
if next_len < max_length:
truncated += '{0}{1}'.format(word, separator)
elif next_len == max_length:
truncated += '{0}'.format(word)
break
else:
if save_order:
break
if not truncated: # pragma: no cover
truncated = string[:max_length]
return truncated.strip(separator)
def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, word_boundary=False,
separator=DEFAULT_SEPARATOR, save_order=False, stopwords=(), regex_pattern=None):
"""
Make a slug from the given text.
:param text (str): initial text
:param entities (bool):
:param decimal (bool):
:param hexadecimal (bool):
:param max_length (int): output string length
:param word_boundary (bool):
:param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order
:param separator (str): separator between words
:param stopwords (iterable): words to discount
:param regex_pattern (str): regex pattern for allowed characters
:return (str):
"""
# ensure text is unicode
if not isinstance(text, _unicode_type):
text = _unicode(text, 'utf-8', 'ignore')
# replace quotes with dashes - pre-process
text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text)
# decode unicode
text = unidecode.unidecode(text)
# ensure text is still in unicode
if not isinstance(text, _unicode_type):
text = _unicode(text, 'utf-8', 'ignore')
# character entity reference
if entities:
text = CHAR_ENTITY_PATTERN.sub(lambda m: unichr(name2codepoint[m.group(1)]), text)
# decimal character reference
if decimal:
try:
text = DECIMAL_PATTERN.sub(lambda m: unichr(int(m.group(1))), text)
except:
pass
# hexadecimal character reference
if hexadecimal:
try:
text = HEX_PATTERN.sub(lambda m: unichr(int(m.group(1), 16)), text)
except:
pass
# translate
text = unicodedata.normalize('NFKD', text)
if sys.version_info < (3,):
text = text.encode('ascii', 'ignore')
# make the text lowercase
text = text.lower()
# remove generated quotes -- post-process
text = QUOTE_PATTERN.sub('', text)
# cleanup numbers
text = NUMBERS_PATTERN.sub('', text)
# replace all other unwanted characters
pattern = regex_pattern or ALLOWED_CHARS_PATTERN
text = re.sub(pattern, DEFAULT_SEPARATOR, text)
# remove redundant
text = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, text).strip(DEFAULT_SEPARATOR)
# remove stopwords
if stopwords:
stopwords_lower = [s.lower() for s in stopwords]
words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords_lower]
text = DEFAULT_SEPARATOR.join(words)
# smart truncate if requested
if max_length > 0:
text = smart_truncate(text, max_length, word_boundary, DEFAULT_SEPARATOR, save_order)
if separator != DEFAULT_SEPARATOR:
text = text.replace(DEFAULT_SEPARATOR, separator)
return text
def main(): # pragma: no cover
if len(sys.argv) < 2:
print("Usage %s TEXT TO SLUGIFY" % sys.argv[0])
else:
text = ' '.join(sys.argv[1:])
print(slugify(text))
|