This file is indexed.

/usr/include/dspam/tokenizer.h is in libdspam7-dev 3.10.1+dfsg-4ubuntu1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
/* $Id: tokenizer.h,v 1.10 2011/06/28 00:13:48 sbajic Exp $ */

/*
 DSPAM
 COPYRIGHT (C) 2002-2011 DSPAM PROJECT

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as
 published by the Free Software Foundation, either version 3 of the
 License, or (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.

 You should have received a copy of the GNU Affero General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.

*/

#ifndef _TOKENIZER_H
#  define _TOKENIZER_H

#include "diction.h"
#include "nodetree.h"
#include "error.h"
#include "storage_driver.h"
#include "decode.h"

#define SPARSE_WINDOW_SIZE       5

int _ds_tokenize(
  DSPAM_CTX * CTX,
   char *headers,
   char *body,
   ds_diction_t diction);

int _ds_tokenize_sparse(
  DSPAM_CTX * CTX, 
  char *headers, 
  char *body, 
  ds_diction_t diction);

int _ds_tokenize_ngram(
  DSPAM_CTX * CTX,
  char *headers,
  char *body,
  ds_diction_t diction);

/* _ds_process: ngram token generation routines */

int _ds_process_header_token(
  DSPAM_CTX * CTX,
  char *joined_token,
  const char *previous_token,
  ds_diction_t diction,
  const char *heading);

int _ds_process_body_token(
  DSPAM_CTX * CTX,
  char *joined_token,
  const char *previous_token,
  ds_diction_t diction); 

/* _ds_map: sparse token generation routines */

int _ds_map_header_token(
  DSPAM_CTX * CTX,
  char *token,
  char **previous_tokens,
  ds_diction_t diction,
  const char *heading,
  const char *bitpattern);

int _ds_map_body_token(
  DSPAM_CTX * CTX,
  char *token,
  char **previous_tokens,
  ds_diction_t diction,
  const char *bitpattern);

int _ds_degenerate_message(
  DSPAM_CTX *CTX,
  buffer *header,
  buffer *body);

int _ds_url_tokenize(
  ds_diction_t diction,
  char *body,
  const char *key);

void _ds_sparse_clear
  (char **previous_tokens);

char * _ds_truncate_token
  (const char *token);

char *_ds_generate_bitpattern
  (int breadth);

#endif