/usr/include/podofo/base/PdfContentsTokenizer.h is in libpodofo-dev 0.9.0-1.2+b2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | /***************************************************************************
* Copyright (C) 2007 by Dominik Seichter *
* domseichter@web.de *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU Library General Public License as *
* published by the Free Software Foundation; either version 2 of the *
* License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU Library General Public *
* License along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
#ifndef _PDF_CONTENTS_TOKENIZER_H_
#define _PDF_CONTENTS_TOKENIZER_H_
#include "PdfDefines.h"
#include "PdfTokenizer.h"
#include "PdfVariant.h"
#include <list>
namespace PoDoFo {
class PdfDocument;
class PdfCanvas;
class PdfObject;
/** An enum describing the type of a read token
*/
enum EPdfContentsType {
ePdfContentsType_Keyword, /**< The token is a PDF keyword. */
ePdfContentsType_Variant, /**< The token is a PDF variant. A variant is usually a parameter to a keyword */
ePdfContentsType_ImageData /**< The "token" is raw inline image data found between ID and EI tags (see PDF ref section 4.8.6) */
};
/** This class is a parser for content streams in PDF documents.
*
* The parsed content stream can be used and modified in various ways.
*
* This class is currently work in progress and subject to change!
*/
class PODOFO_API PdfContentsTokenizer : public PdfTokenizer {
public:
/** Construct a PdfContentsTokenizer from an existing buffer.
* Usually a stream from a PdfPage.
*
* \param pBuffer pointer to a buffer
* \param lLen length of the buffer
*/
PdfContentsTokenizer( const char* pBuffer, long lLen )
: PoDoFo::PdfTokenizer( pBuffer, lLen ), m_readingInlineImgData(false)
{
}
/** Construct a PdfContentsTokenizer from a PdfCanvas
* (i.e. PdfPage or a PdfXObject).
*
* This is more convinient as you do not have
* to care about buffers yourself.
*
* \param pCanvas an object that hold a PDF contents stream
*/
PdfContentsTokenizer( PdfCanvas* pCanvas );
virtual ~PdfContentsTokenizer() { }
/** Read the next keyword or variant, returning true and setting reType if something was read.
* Either rpszKeyword or rVariant, but never both, have defined and usable values on
* true return, with which being controlled by the value of eType.
*
* If EOF is encountered, returns false and leaves eType, pszKeyword and
* rVariant undefined.
*
* As a special case, reType may be set to ePdfContentsType_ImageData. In
* this case rpszzKeyword is undefined, and rVariant contains a PdfData
* variant containing the byte sequence between the ID and BI keywords
* sans the one byte of leading- and trailing- white space. No filter
* decoding is performed.
*
* \param[out] reType will be set to either keyword or variant if true is returned. Undefined
* if false is returned.
*
* \param[out] rpszKeyword if pType is set to ePdfContentsType_Keyword this will point to the keyword,
* otherwise the value is undefined. If set, the value points to memory owned by the
* PdfContentsTokenizer and must not be freed. The value is invalidated when ReadNext
* is next called or when the PdfContentsTokenizer is destroyed.
*
* \param[out] rVariant if pType is set to ePdfContentsType_Variant or ePdfContentsType_ImageData
* this will be set to the read variant, otherwise the value is undefined.
*
*/
bool ReadNext( EPdfContentsType& reType, const char*& rpszKeyword, PoDoFo::PdfVariant & rVariant );
bool GetNextToken( const char *& pszToken, EPdfTokenType* peType = NULL);
private:
/** Set another objects stream as the current stream for parsing
*
* \param pObject use the stream of this object for parsing
*/
void SetCurrentContentsStream( PdfObject* pObject );
bool ReadInlineImgData(EPdfContentsType& reType, const char*& rpszKeyword, PoDoFo::PdfVariant & rVariant);
private:
std::list<PdfObject*> m_lstContents; ///< A list containing pointers to all contents objects
bool m_readingInlineImgData; ///< A state of reading inline image data
};
};
#endif // _PDF_CONTENTS_TOKENIZER_H_
|