/usr/share/pyshared/logsparser/extras/robots.py is in python-logsparser 0.4-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | # -*- coding: utf-8 -*-
# -*- python -*-
# pylogsparser - Logs parsers python library
#
# Copyright (C) 2011 Wallix Inc.
#
# This library is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation; either version 2.1 of the License, or (at your
# option) any later version.
#
# This library is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this library; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
"""In this module we define a regular expression used to fetch the most common
robots."""
import re
# taken from genrobotlist.pl in the awstats project : http://awstats.cvs.sourceforge.net
robots = [
'antibot',
'appie',
'architext',
'bingbot',
'bjaaland',
'digout4u',
'echo',
'fast-webcrawler',
'ferret',
'googlebot',
'gulliver',
'harvest',
'htdig',
'ia_archiver',
'askjeeves',
'jennybot',
'linkwalker',
'lycos',
'mercator',
'moget',
'muscatferret',
'myweb',
'netcraft',
'nomad',
'petersnews',
'scooter',
'slurp',
'unlost_web_crawler',
'voila',
'voyager',
'webbase',
'weblayers',
'wisenutbot',
'aport',
'awbot',
'baiduspider',
'bobby',
'boris',
'bumblebee',
'cscrawler',
'daviesbot',
'exactseek',
'ezresult',
'gigabot',
'gnodspider',
'grub',
'henrythemiragorobot',
'holmes',
'internetseer',
'justview',
'linkbot',
'metager-linkchecker',
'linkchecker',
'microsoft_url_control',
'msiecrawler',
'nagios',
'perman',
'pompos',
'rambler',
'redalert',
'shoutcast',
'slysearch',
'surveybot',
'turnitinbot',
'turtlescanner',
'turtle',
'ultraseek',
'webclipping.com',
'webcompass',
'yahoo-verticalcrawler',
'yandex',
'zealbot',
'zyborg',
]
robot_regex = re.compile("|".join(robots), re.IGNORECASE)
|