This file is indexed.

/usr/share/pyshared/logsparser/extras/robots.py is in python-logsparser 0.4-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
# -*- coding: utf-8 -*-

# -*- python -*-

# pylogsparser - Logs parsers python library
#
# Copyright (C) 2011 Wallix Inc.
#
# This library is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation; either version 2.1 of the License, or (at your
# option) any later version.
#
# This library is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this library; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#

"""In this module we define a regular expression used to fetch the most common
robots."""

import re

# taken from genrobotlist.pl in the awstats project : http://awstats.cvs.sourceforge.net
robots = [
    'antibot',
    'appie',
    'architext',
    'bingbot',
    'bjaaland',
    'digout4u',
    'echo',
    'fast-webcrawler',
    'ferret',
    'googlebot',
    'gulliver',
    'harvest',
    'htdig',
    'ia_archiver',
    'askjeeves',
    'jennybot',
    'linkwalker',
    'lycos',
    'mercator',
    'moget',
    'muscatferret',
    'myweb',
    'netcraft',
    'nomad',
    'petersnews',
    'scooter',
    'slurp',
    'unlost_web_crawler',
    'voila',
    'voyager',
    'webbase',
    'weblayers',
    'wisenutbot',
    'aport',
    'awbot',
    'baiduspider',
    'bobby',
    'boris',
    'bumblebee',
    'cscrawler',
    'daviesbot',
    'exactseek',
    'ezresult',
    'gigabot',
    'gnodspider',
    'grub',
    'henrythemiragorobot',
    'holmes',
    'internetseer',
    'justview',
    'linkbot',
    'metager-linkchecker',
    'linkchecker',
    'microsoft_url_control',
    'msiecrawler',
    'nagios',
    'perman',
    'pompos',
    'rambler',
    'redalert',
    'shoutcast',
    'slysearch',
    'surveybot',
    'turnitinbot',
    'turtlescanner',
    'turtle',
    'ultraseek',
    'webclipping.com',
    'webcompass',
    'yahoo-verticalcrawler',
    'yandex',
    'zealbot',
    'zyborg',
]
robot_regex = re.compile("|".join(robots), re.IGNORECASE)