This file is indexed.

/usr/share/spamassassin/20_pdfinfo.cf is in spamassassin 3.4.2-0ubuntu0.14.04.1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
# SpamAssassin rules file: Pdfinfo rules
#
# Please don't modify this file as your changes will be overwritten with
# the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead.
# See 'perldoc Mail::SpamAssassin::Conf' for details.
#
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at:
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
#
###########################################################################

# 2014-12-02 - axb
# Info and disabled rules kept for historical & documentation reasons
# Updated rules may be added
#
# Original File: pdfinfo.cf
# Original Version: 0.6
# Info: $Id: pdfinfo.cf 895 2007-07-27 10:31:08Z alexb $
# Created: 2007-06-25
# Modified: 2007-07-19
# Original / Defunct Site URL: http://www.rulesemporium.com/plugins.htm#PDFinfo
# Author: Dallas Engelken (aka GMD :-)
# Rules contributed by Alex Broens
# Requires: PDFInfo.pm plugin
# Description: This plugin/ruleset combination will help you alleviate the new
#              PDF based stock spam which began to appear mid-June, 2007.
#
#
# Changes:
#
#   0.6 - added easypdf producer rule and more no body text metas
#       - tags support added, see USING TAGS below.
#   0.5 - added fuzzy test 7
#   0.4 - added new fuzzy for encyprted pdf image spams.
#       - added rule to check for encryption
#   0.3 - added rules based on the new pdf_match_details() function
#       - added additional fuzzy md5 rules
#       - disabled static md5 rules as they are no longer hitting.
#   0.2 - added static md5 to hit full page stock spam.
#   0.1 - initial ruleset.
#

############################################
# USING TAGS
############################################

# The follow tags can be defined in an add_header line
#
# _PDFCOUNT_    - total number of pdf mime parts in the email
# _PDFIMGCOUNT_ - total number of images found inside pdf mime parts
# _PDFVERSION_  - PDF Version, space seperated if there are > 1 pdf attachments
# _PDFNAME_     - Filenames as found in the mime headers of PDF parts
# _PDFPRODUCER_ - Producer/Application that created the PDF(s)
# _PDFAUTHOR_   - Author of the PDF
# _PDFCREATOR_  - Creator/Program that created the PDF(s)
# _PDFTITLE_    - Title of the PDF File, if available
# _PDFIMGDIM_   - If PDF Contains images, the dimensions of them will be put here
# _PDFIMGAREA_  - The total area of all combined images inside the PDF(s)
# _PDFMD5_      - MD5 checksum of PDF(s) - space seperated
# _PDFMD5FUZZY1_- Fuzzy1 MD5 checksum of PDF(s) - space seperated
# _PDFMD5FUZZY2_- Fuzzy2 MD5 checksum of PDF(s) - space seperated
#
# Example add_header lines
#
# add_header all PDF-Info pdf=_PDFCOUNT_, pdfimg=_PDFIMGCOUNT_, ver=_PDFVERSION_, name=_PDFNAME_
# add_header all PDF-Details producer=_PDFPRODUCER_, author=_PDFAUTHOR_, creator=_PDFCREATOR_, title=_PDFTITLE_
# add_header all PDF-ImageInfo dim=_PDFIMGDIM_, area=_PDFIMGAREA_
# add_header all PDF-Md5 md5=_PDFMD5_, fuzzy1=_PDFMD5FUZZY1_, fuzzy2=_PDFMD5FUZZY2_
#

############################################
# GENERIC RULE EXAMPLES SHOWING EVAL USAGE
############################################

# you can match by name
# body          MY_TEST_PDF             eval:pdf_named('mytest.pdf')

# or you can write a regex to match dynamic file names.
# body          MY_TEST_PDF             eval:pdf_name_regex('/^(?:my|your)test\.pdf$/')

# you can make it case insensitive by using modifiers
# body          PDF_IMGXXXXX            eval:pdf_name_regex('/^IMG\D+\.\.PDF$/i')

# you can do exact image size matches
# body          PDF_DEMS_150_400        eval:pdf_image_size_exact(150,400)

# you can do image to text, or image to html ratios
# rawbody       PDF_TO_HTML_RATIO       eval:pdf_image_to_text_ratio(0.000, 0.015)
# body          PDF_TO_TEXT_RATIO       eval:pdf_image_to_text_ratio(0.000, 0.008)

# you can do minimum demension matches
# body          PDF_SIZE_RANGE_1        eval:pdf_image_size_range(300,300)

# you can do ranged demension matches
# body          PDF_SIZE_RANGE_2        eval:pdf_image_size_range(200, 300, 250, 350)

# you can count the number of pdf mime partts
# body          PDF_MIME_COUNT_1        eval:pdf_count(1,1)
# body          PDF_MIME_COUNT_2_PLUS   eval:pdf_count(2)

# you can count the number of images inside the pdfs
# body          PDF_IMG_COUNT_1         eval:pdf_image_count(1,1)
# body          PDF_IMG_COUNT_2_PLUS    eval:pdf_image_count(2)

# you can determine pixel coverage
# body          PDF_AREA_SMALL          eval:pdf_pixel_coverage(1,100000)


# match a md5 or fuzzy md5 signature of the pdf

# body          PDF_BAD_MD5             eval:pdf_match_md5('C359F8F89B290DA99DC997ED50117CDF')
# body          PDF_BAD_FUZZY           eval:pdf_match_fuzzy_md5('7340821445D975EEF6F5BDE2EC257900')

# Now you can match against certain details if they are found in the PDF.
# A regex match is used on the value specified, so if you want to do an
# exact match, use anchors  ^value$
#
# body          GMD_AUTHOR_MOBILE       eval:pdf_match_details('author','/^mobile$/')
# body          GMD_PRODUCER_GPL        eval:pdf_match_details('producer','/(?i)^gpl ghostscript/')
# body          GMD_CREATOR_PSCRIPT5    eval:pdf_match_details('creator','/^PScript5/')
# body          GMD_TITLE_WORD_DOC1     eval:pdf_match_details('title','/^Microsoft Word \- Document1$/)
# body          GMD_CREATED_JULY07      eval:pdf_match_details('created','/^200707/')
# body          GMD_MODIFIED_JULY07     eval:pdf_match_details('modified','/^200707/')

ifplugin Mail::SpamAssassin::Plugin::PDFInfo

#######################################
# DISABLED RULES, ENABLE IF YOU WANT
#######################################

# Small area
# Disabled - Hits Ham
# body          GMD_PDF_SMALL_AREA      eval:pdf_pixel_coverage(1,100000)
# describe      GMD_PDF_SMALL_AREA      PDF Area covers 150k pixels or less
# score         GMD_PDF_SMALL_AREA      0.75
# counts        GMD_PDF_SMALL_AREA      51s/15h of 10615 corpus (5652s/4963h AxB) 06/25/07

# NOTE - people do send pdf's without message bodies!
# Disabled - Hits Ham
# body          GMD_PDF_NO_TXT          eval:pdf_image_to_text_ratio(0.000, 0.005)
# describe      GMD_PDF_NO_TXT          Low rawbody to pixel area ratio
# score         GMD_PDF_NO_TXT          0.01
# counts        GMD_PDF_NO_TXT          64s/3h of 10615 corpus (5652s/4963h AxB) 06/25/07

####################################
# HERE ARE THE LIVE RULES
####################################



######################################################################################################
# pdf image dimensions

# thin horizontal, common stox.
body            GMD_PDF_HORIZ           eval:pdf_image_size_range(100, 450, 240, 800)
describe        GMD_PDF_HORIZ           Contains pdf 100-240 (high) x 450-800 (wide)
score           GMD_PDF_HORIZ           0.25
# counts        GMD_PDF_HORIZ           135s/0h of 6132 corpus (4555s/1577h AxB-MANUAL) 07/11/07
# counts        GMD_PDF_HORIZ           278s/0h of 34051 corpus (33259s/792h AxB2-TRAPS) 07/13/07

# near square, and small.  common stox.
body            GMD_PDF_SQUARE          eval:pdf_image_size_range(180, 180, 360, 360)
describe        GMD_PDF_SQUARE          Contains pdf 180-360 (high) x 180-360 (wide)
score           GMD_PDF_SQUARE          0.50
# counts        GMD_PDF_SQUARE          36s/0h of 6132 corpus (4555s/1577h AxB-MANUAL) 07/11/07
# counts        GMD_PDF_SQUARE          46s/0h of 34051 corpus (33259s/792h AxB2-TRAPS) 07/13/07

# thin vertical, very tall.  common stox.
body            GMD_PDF_VERT            eval:pdf_image_size_range(450, 100, 800, 240)
describe        GMD_PDF_VERT            Contains pdf 450-800 (high) x 100-240 (wide)
score           GMD_PDF_VERT            0.90
# counts        GMD_PDF_VERT            24s/0h of 6132 corpus (4555s/1577h AxB-MANUAL) 07/11/07
# counts        GMD_PDF_VERT            10s/0h of 11773 corpus (10988s/785h AxB2-TRAPS) 07/11/07

######################################################################################################
# static checksums

# all static md5 spam runs are complete as of 7/11
# if there are more, we'll add new rules.

# removed fuzzy rules dated 2007
# Get fuzzy info:
# cat msg.eml | spamassassin --debug pdfinfo  2>&1 | grep fuzzy 2>&1

# sample rules ONLY
# fuzzy checksum for bad stox
#body            GMD_PDF_FUZZY1_T1       eval:pdf_match_fuzzy_md5('57EBC1FFB1A24CC14AE23E1E227C3484')
#describe        GMD_PDF_FUZZY1_T1       Fuzzy MD5 Match 57EBC1FFB1A24CC14AE23E1E227C3484
#score           GMD_PDF_FUZZY1_T1       0.001

# same as rule above using fuzzy md5 of pdf structure
#body            GMD_PDF_FUZZY2_T1       eval:pdf_match_fuzzy_md5('653C8AA9FDFD03D382523488058360A2')
#describe        GMD_PDF_FUZZY2_T1       Fuzzy MD5 Match 653C8AA9FDFD03D382523488058360A2
#score           GMD_PDF_FUZZY2_T1       0.001


######################################################################################################
# pdf_match_details()

# from embedded link spam
#body            GMD_AUTHOR_COLET        eval:pdf_match_details('author','/^colet$/')
#describe        GMD_AUTHOR_COLET        PDF author was 'colet'
#score           GMD_AUTHOR_COLET        4.50
# counts        GMD_AUTHOR_COLET        1s/0h of 10767 corpus (9986s/781h AxB2-TRAPS) 07/11/07
# counts        GMD_AUTHOR_COLET        2s/0h of 6132 corpus (555s/1577h AxB-MANUAL) 07/11/07

# from full page pdf stock spammer.
#body            GMD_AUTHOR_MOBILE       eval:pdf_match_details('author','/^mobile$/')
#describe        GMD_AUTHOR_MOBILE       PDF author was 'mobile'
#score           GMD_AUTHOR_MOBILE       2.75
# counts        GMD_AUTHOR_MOBILE       2s/0h of 6132 corpus (555s/1577h AxB-MANUAL) 07/11/07
# counts        GMD_AUTHOR_MOBILE       55s/0h of 10767 corpus (9986s/781h AxB2-TRAPS) 07/11/07

# txt only stock spam
#body            GMD_AUTHOR_OOO          eval:pdf_match_details('author','/^openofficeuser$/')
#describe        GMD_AUTHOR_OOO          PDF author was 'openofficeuser'
#score           GMD_AUTHOR_OOO          1.75
# counts        GMD_AUTHOR_OOO          1s/0h of 10767 corpus (9986s/781h AxB2-TRAPS) 07/11/07
# counts        GMD_AUTHOR_OOO          118s/0h of 6132 corpus (555s/1577h AxB-MANUAL) 07/11/07

# txt only stock spam
#body            GMD_AUTHOR_HPADMIN      eval:pdf_match_details('author','/^HP_Administrator/')
#describe        GMD_AUTHOR_HPADMIN      PDF author was 'HP_Administrator'
#score           GMD_AUTHOR_HPADMIN      0.25
# counts        GMD_AUTHOR_HPADMIN      105s/0h of 6132 corpus (4555s/1577h AxB-MANUAL) 07/11/07
# counts        GMD_AUTHOR_HPADMIN      27s/0h of 11773 corpus (10988s/785h AxB2-TRAPS) 07/11/07

# generic rule for software used to produce the pdf.
body            GMD_PRODUCER_GPL        eval:pdf_match_details('producer','/^(?:gnu|gpl) ghostscript/i')
describe        GMD_PRODUCER_GPL        PDF producer was GPL Ghostscript
score           GMD_PRODUCER_GPL        0.25
# counts        GMD_PRODUCER_GPL        227s/0h of 6132 corpus (555s/1577h AxB-MANUAL) 07/11/07
# counts        GMD_PRODUCER_GPL        85s/0h of 10767 corpus (9986s/781h AxB2-TRAPS) 07/11/07

# generic rule for software used to produce the pdf.
body            GMD_PRODUCER_POWERPDF   eval:pdf_match_details('producer','/^PowerPdf 0\./')
describe        GMD_PRODUCER_POWERPDF   PDF producer was PowerPDF
score           GMD_PRODUCER_POWERPDF   0.25
# counts        GMD_PRODUCER_POWERPDF   0s/0h of 10767 corpus (9986s/781h AxB2-TRAPS) 07/11/07
# counts        GMD_PRODUCER_POWERPDF   0s/0h of 5641 corpus (4064s/1577h AxB-MANUAL) 07/11/07

# producer is bcl
body            GMD_PRODUCER_EASYPDF    eval:pdf_match_details('producer','/^BCL easyPDF/')
describe        GMD_PRODUCER_EASYPDF    PDF producer was BCL easyPDF
score           GMD_PRODUCER_EASYPDF    0.25

# simple check for encryption used inside pdf.
# recommend meta with something else...
body            GMD_PDF_ENCRYPTED       eval:pdf_is_encrypted()
describe        GMD_PDF_ENCRYPTED       Attached PDF is encrypted
score           GMD_PDF_ENCRYPTED       0.60
# counts        GMD_PDF_ENCRYPTED       13s/0h of 34051 corpus (33259s/792h AxB2-TRAPS) 07/13/07

# simple check for empty msg body when there is one or more pdf attachments present.
body            GMD_PDF_EMPTY_BODY      eval:pdf_is_empty_body()
describe        GMD_PDF_EMPTY_BODY      Attached PDF with empty message body
score           GMD_PDF_EMPTY_BODY      0.25
# counts        GMD_PDF_EMPTY_BODY      1638s/20h of 27034 corpus (24636s/2398h AxB-MANUAL) 07/19/07

######################################################################################################
# metas
#meta            __GMD_PDF_CHECKSUM      ( GMD_PDF_FUZZY1_T1 || GMD_PDF_FUZZY2_T1 || GMD_PDF_FUZZY2_T2 || GMD_PDF_FUZZY2_T3 || GMD_PDF_FUZZY2_T4 || GMD_PDF_FUZZY2_T5 || GMD_PDF_FUZZY2_T6 || GMD_PDF_FUZZY2_T7 ||GMD_PDF_FUZZY2_T9 || GMD_PDF_FUZZY2_T10 || GMD_PDF_FUZZY2_T11 || GMD_PDF_FUZZY2_T12 )
#meta            __GMD_PDF_DETAIL        ( GMD_AUTHOR_COLET || GMD_AUTHOR_MOBILE || GMD_AUTHOR_OOO || GMD_AUTHOR_HPADMIN || GMD_PRODUCER_GPL || GMD_PRODUCER_POWERPDF || GMD_PRODUCER_EASYPDF )
meta            __GMD_PDF_DIMS          ( GMD_PDF_VERT || GMD_PDF_HORIZ || GMD_PDF_SQUARE )
meta            __GMD_PDF_PRODUCERS     ( GMD_PRODUCER_GPL || GMD_PRODUCER_POWERPDF || GMD_PRODUCER_EASYPDF )

# rule hits ham by itself, so use just to meta.
body            __GMD_PDF_NO_TXT        eval:pdf_image_to_text_ratio(0.000, 0.005)

# meta checksum hit with image dimensions
#meta            GMD_PDF_STOX_M1         ( __GMD_PDF_CHECKSUM && __GMD_PDF_DIMS)
#describe        GMD_PDF_STOX_M1         PDF Stox spam
#score           GMD_PDF_STOX_M1         3.25
# counts        GMD_PDF_STOX_M1         159s/0h of 6132 corpus (555s/1577h AxB-MANUAL) 07/11/07
# counts        GMD_PDF_STOX_M1         40s/0h of 11773 corpus (10988s/785h AxB2-TRAPS) 07/11/07

# meta checksum hit to pdf details
#meta            GMD_PDF_STOX_M2         ( __GMD_PDF_CHECKSUM && __GMD_PDF_DETAIL )
#describe        GMD_PDF_STOX_M2         PDF Stox spam
#score           GMD_PDF_STOX_M2         2.95
# counts        GMD_PDF_STOX_M2         223s/0h of 6132 corpus (555s/1577h AxB-MANUAL) 07/11/07
# counts        GMD_PDF_STOX_M2         29s/0h of 10767 corpus (9986s/781h AxB2-TRAPS) 07/11/07

# meta dimensions and encryption
#meta            GMD_PDF_STOX_M3         ( __GMD_PDF_DIMS && GMD_PDF_ENCRYPTED )
#describe        GMD_PDF_STOX_M3         PDF Stox spam
#score           GMD_PDF_STOX_M3         2.25
# counts        GMD_PDF_STOX_M3         12s/0h of 34051 corpus (33259s/792h AxB2-TRAPS) 07/13/07

# meta checksum with no text
#meta            GMD_PDF_STOX_M4         ( __GMD_PDF_CHECKSUM && (__GMD_PDF_NO_TXT || GMD_PDF_EMPTY_BODY))
#describe        GMD_PDF_STOX_M4         PDF Stox spam
#score           GMD_PDF_STOX_M4         2.95

# meta no body text along with automated pdf production.
#meta            GMD_PDF_STOX_M5         ( __GMD_PDF_PRODUCERS &&  (__GMD_PDF_NO_TXT || GMD_PDF_EMPTY_BODY))
#describe        GMD_PDF_STOX_M5         PDF Stox Spam
#score           GMD_PDF_STOX_M5         1.00

endif