/usr/lib/ocaml/camlpdf/pdftext.mli is in libcamlpdf-ocaml-dev 2.1.1-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 | (** Parsing fonts and extracting text from content streams and PDF strings *)
(** {2 Data Types } *)
type type3_glpyhs =
{fontbbox : float * float * float * float;
fontmatrix : Pdftransform.transform_matrix;
charprocs : (string * Pdf.pdfobject) list;
type3_resources : Pdf.pdfobject}
type simple_fonttype =
| Type1
| MMType1
| Type3 of type3_glpyhs
| Truetype
type fontmetrics = float array
type fontfile =
| FontFile of int
| FontFile2 of int
| FontFile3 of int
type fontdescriptor =
{ascent : float;
descent : float;
leading : float;
avgwidth : float;
maxwidth : float;
fontfile : fontfile option}
type differences = (string * int) list
type encoding =
| ImplicitInFontFile
| StandardEncoding
| MacRomanEncoding
| WinAnsiEncoding
| MacExpertEncoding
| CustomEncoding of encoding * differences
| FillUndefinedWithStandard of encoding
type simple_font =
{fonttype : simple_fonttype;
basefont : string;
fontmetrics : fontmetrics option;
fontdescriptor : fontdescriptor option;
encoding : encoding}
type standard_font =
| TimesRoman
| TimesBold
| TimesItalic
| TimesBoldItalic
| Helvetica
| HelveticaBold
| HelveticaOblique
| HelveticaBoldOblique
| Courier
| CourierBold
| CourierOblique
| CourierBoldOblique
| Symbol
| ZapfDingbats
type cid_system_info =
{registry : string;
ordering : string;
supplement : int}
type composite_CIDfont =
{cid_system_info : cid_system_info;
cid_basefont : string;
cid_fontdescriptor : fontdescriptor;
cid_widths : (int * float) list;
cid_default_width : int}
type cmap_encoding =
| Predefined of string
| CMap of int (* indirect reference to CMap stream *)
type font =
| StandardFont of standard_font * encoding
| SimpleFont of simple_font
| CIDKeyedFont of string * composite_CIDfont * cmap_encoding
(** {2 String representations of fonts } *)
(** Returns a string such as "Times-Bold" for Pdftext.TimesBold etc. *)
val string_of_standard_font : standard_font -> string
(** Parses a string such as "/Times-Bold" or "/TimesNewRoman,Bold" to Pdftext.TimesRomanBold etc. *)
val standard_font_of_name : string -> standard_font option
(** A debug string for the whole font datatype. *)
val string_of_font : font -> string
(** {2 Reading a Font} *)
(** Read a font from a given document and object *)
val read_font : Pdf.t -> Pdf.pdfobject -> font
(** {2 Writing a Font} *)
(** Write a font to a given document, returning the object number for the main
font dictionary *)
val write_font : Pdf.t -> font -> int
(** {2 Utility functions} *)
(** A list of unicode codepoints for a UTF8 string *)
val codepoints_of_utf8 : string -> int list
(** A UTF8 string for a list of unicode codepoints *)
val utf8_of_codepoints : int list -> string
(** {2 Text from strings outside page content} *)
(** Take a pdf string (which will be either pdfdocencoding or UTF16BE) and
return a string representing the same unicode codepoints in UTF8 *)
val utf8_of_pdfdocstring : string -> string
(** Take a UTF8 string and convert to pdfdocencoding (if no unicode-only
characters are used) or UTF16BE (if they are)) *)
val pdfdocstring_of_utf8 : string -> string
(** Build a pdf string in pdfdocencoding (if no unicode-only characters are
used) or UTF16BE (if they are) *)
val pdfdocstring_of_codepoints : int list -> string
(** Produce a list of unicode codepoints from a pdfdocencoding or UTF16BE pdf
document string *)
val codepoints_of_pdfdocstring : string -> int list
(** {2 Text from strings inside page content} *)
(** The type of text extractors. *)
type text_extractor
(** Build a text extractor from a document and font object *)
val text_extractor_of_font : Pdf.t -> Pdf.pdfobject -> text_extractor
(** Return a list of unicode points from a given extractor and string (for
example from a [Pdfpages.Op_Tj] or [Op_TJ] operator). *)
val codepoints_of_text : text_extractor -> string -> int list
(** Return a list of glyph names from a given extractor and string *)
val glyphnames_of_text : text_extractor -> string -> string list
(** {2 Building text for strings inside page content} *)
(** Return the character code for a given unicode codepoint, if it exists in
this encoding. This is only really suitable for simple stuff like standard 14
fonts, or editing text in existing fonts. *)
val charcode_extractor_of_encoding : encoding -> (int -> int option)
|