/usr/share/pyshared/matplotlib/afm.py

"""
This is a python interface to Adobe Font Metrics Files.  Although a
number of other python implementations exist (and may be more complete
than mine) I decided not to go with them because either they were
either

  1) copyrighted or used a non-BSD compatible license

  2) had too many dependencies and I wanted a free standing lib

  3) Did more than I needed and it was easier to write my own than
     figure out how to just get what I needed from theirs

It is pretty easy to use, and requires only built-in python libs::

    >>> from afm import AFM
    >>> fh = file('ptmr8a.afm')
    >>> afm = AFM(fh)
    >>> afm.string_width_height('What the heck?')
    (6220.0, 683)
    >>> afm.get_fontname()
    'Times-Roman'
    >>> afm.get_kern_dist('A', 'f')
    0
    >>> afm.get_kern_dist('A', 'y')
    -92.0
    >>> afm.get_bbox_char('!')
    [130, -9, 238, 676]
    >>> afm.get_bbox_font()
    [-168, -218, 1000, 898]


AUTHOR:
  John D. Hunter <jdh2358@gmail.com>
"""

import sys, os, re
from _mathtext_data import uni2type1

#Convert string the a python type

# some afm files have floats where we are expecting ints -- there is
# probably a better way to handle this (support floats, round rather
# than truncate).  But I don't know what the best approach is now and
# this change to _to_int should at least prevent mpl from crashing on
# these JDH (2009-11-06)

def _to_int(x):
    return int(float(x))

_to_float = float
_to_str =  str

def _to_list_of_ints(s):
    s = s.replace(',', ' ')
    return [_to_int(val) for val in s.split()]

def _to_list_of_floats(s):
    return [_to_float(val) for val in s.split()]
def _to_bool(s):
    if s.lower().strip() in ('false', '0', 'no'): return False
    else: return True


def _sanity_check(fh):
    """
    Check if the file at least looks like AFM.
    If not, raise :exc:`RuntimeError`.
    """

    # Remember the file position in case the caller wants to
    # do something else with the file.
    pos = fh.tell()
    try:
        line = fh.readline()
    finally:
        fh.seek(pos, 0)

    # AFM spec, Section 4: The StartFontMetrics keyword [followed by a
    # version number] must be the first line in the file, and the
    # EndFontMetrics keyword must be the last non-empty line in the
    # file. We just check the first line.
    if not line.startswith('StartFontMetrics'):
        raise RuntimeError('Not an AFM file')

def _parse_header(fh):
    """
    Reads the font metrics header (up to the char metrics) and returns
    a dictionary mapping *key* to *val*.  *val* will be converted to the
    appropriate python type as necessary; eg:

        * 'False'->False
        * '0'->0
        * '-168 -218 1000 898'-> [-168, -218, 1000, 898]

    Dictionary keys are

      StartFontMetrics, FontName, FullName, FamilyName, Weight,
      ItalicAngle, IsFixedPitch, FontBBox, UnderlinePosition,
      UnderlineThickness, Version, Notice, EncodingScheme, CapHeight,
      XHeight, Ascender, Descender, StartCharMetrics

    """
    headerConverters = {
        'StartFontMetrics': _to_float,
        'FontName': _to_str,
        'FullName': _to_str,
        'FamilyName': _to_str,
        'Weight': _to_str,
        'ItalicAngle': _to_float,
        'IsFixedPitch': _to_bool,
        'FontBBox': _to_list_of_ints,
        'UnderlinePosition': _to_int,
        'UnderlineThickness': _to_int,
        'Version': _to_str,
        'Notice': _to_str,
        'EncodingScheme': _to_str,
        'CapHeight': _to_float, # Is the second version a mistake, or
        'Capheight': _to_float, # do some AFM files contain 'Capheight'? -JKS
        'XHeight': _to_float,
        'Ascender': _to_float,
        'Descender': _to_float,
        'StdHW': _to_float,
        'StdVW': _to_float,
        'StartCharMetrics': _to_int,
        'CharacterSet': _to_str,
        'Characters': _to_int,
        }

    d = {}
    while 1:
        line = fh.readline()
        if not line: break
        line = line.rstrip()
        if line.startswith('Comment'): continue
        lst = line.split( ' ', 1 )
        #print '%-s\t%-d line :: %-s' % ( fh.name, len(lst), line )
        key = lst[0]
        if len( lst ) == 2:
            val = lst[1]
        else:
            val = ''
        #key, val = line.split(' ', 1)
        try: d[key] = headerConverters[key](val)
        except ValueError:
            print >>sys.stderr, 'Value error parsing header in AFM:', key, val
            continue
        except KeyError:
            print >>sys.stderr, 'Found an unknown keyword in AFM header (was %s)' % key
            continue
        if key=='StartCharMetrics': return d
    raise RuntimeError('Bad parse')

def _parse_char_metrics(fh):
    """
    Return a character metric dictionary.  Keys are the ASCII num of
    the character, values are a (*wx*, *name*, *bbox*) tuple, where
    *wx* is the character width, *name* is the postscript language
    name, and *bbox* is a (*llx*, *lly*, *urx*, *ury*) tuple.

    This function is incomplete per the standard, but thus far parses
    all the sample afm files tried.
    """

    ascii_d = {}
    name_d  = {}
    while 1:
        line = fh.readline()
        if not line: break
        line = line.rstrip()
        if line.startswith('EndCharMetrics'): return ascii_d, name_d
        vals = line.split(';')[:4]
        if len(vals) !=4 : raise RuntimeError('Bad char metrics line: %s' % line)
        num = _to_int(vals[0].split()[1])
        wx = _to_float(vals[1].split()[1])
        name = vals[2].split()[1]
        bbox = _to_list_of_floats(vals[3][2:])
        bbox = map(int, bbox)
        # Workaround: If the character name is 'Euro', give it the corresponding
        # character code, according to WinAnsiEncoding (see PDF Reference).
        if name == 'Euro':
            num = 128
        if num != -1:
            ascii_d[num] = (wx, name, bbox)
        name_d[name] = (wx, bbox)
    raise RuntimeError('Bad parse')

def _parse_kern_pairs(fh):
    """
    Return a kern pairs dictionary; keys are (*char1*, *char2*) tuples and
    values are the kern pair value.  For example, a kern pairs line like
    ``KPX A y -50``

    will be represented as::

      d[ ('A', 'y') ] = -50

    """

    line = fh.readline()
    if not line.startswith('StartKernPairs'):
        raise RuntimeError('Bad start of kern pairs data: %s'%line)

    d = {}
    while 1:
        line = fh.readline()
        if not line: break
        line = line.rstrip()
        if len(line)==0: continue
        if line.startswith('EndKernPairs'):
            fh.readline()  # EndKernData
            return d
        vals = line.split()
        if len(vals)!=4 or vals[0]!='KPX':
            raise RuntimeError('Bad kern pairs line: %s'%line)
        c1, c2, val = vals[1], vals[2], _to_float(vals[3])
        d[(c1,c2)] = val
    raise RuntimeError('Bad kern pairs parse')

def _parse_composites(fh):
    """
    Return a composites dictionary.  Keys are the names of the
    composites.  Values are a num parts list of composite information,
    with each element being a (*name*, *dx*, *dy*) tuple.  Thus a
    composites line reading:

      CC Aacute 2 ; PCC A 0 0 ; PCC acute 160 170 ;

    will be represented as::

      d['Aacute'] = [ ('A', 0, 0), ('acute', 160, 170) ]

    """
    d = {}
    while 1:
        line = fh.readline()
        if not line: break
        line = line.rstrip()
        if len(line)==0: continue
        if line.startswith('EndComposites'):
            return d
        vals = line.split(';')
        cc = vals[0].split()
        name, numParts = cc[1], _to_int(cc[2])
        pccParts = []
        for s in vals[1:-1]:
            pcc = s.split()
            name, dx, dy = pcc[1], _to_float(pcc[2]), _to_float(pcc[3])
            pccParts.append( (name, dx, dy) )
        d[name] = pccParts

    raise RuntimeError('Bad composites parse')

def _parse_optional(fh):
    """
    Parse the optional fields for kern pair data and composites

    return value is a (*kernDict*, *compositeDict*) which are the
    return values from :func:`_parse_kern_pairs`, and
    :func:`_parse_composites` if the data exists, or empty dicts
    otherwise
    """
    optional = {
        'StartKernData' : _parse_kern_pairs,
        'StartComposites' :  _parse_composites,
        }

    d = {'StartKernData':{}, 'StartComposites':{}}
    while 1:
        line = fh.readline()
        if not line: break
        line = line.rstrip()
        if len(line)==0: continue
        key = line.split()[0]

        if key in optional: d[key] = optional[key](fh)

    l = ( d['StartKernData'], d['StartComposites'] )
    return l

def parse_afm(fh):
    """
    Parse the Adobe Font Metics file in file handle *fh*. Return value
    is a (*dhead*, *dcmetrics*, *dkernpairs*, *dcomposite*) tuple where
    *dhead* is a :func:`_parse_header` dict, *dcmetrics* is a
    :func:`_parse_composites` dict, *dkernpairs* is a
    :func:`_parse_kern_pairs` dict (possibly {}), and *dcomposite* is a
    :func:`_parse_composites` dict (possibly {})
    """
    _sanity_check(fh)
    dhead =  _parse_header(fh)
    dcmetrics_ascii, dcmetrics_name = _parse_char_metrics(fh)
    doptional = _parse_optional(fh)
    return dhead, dcmetrics_ascii, dcmetrics_name, doptional[0], doptional[1]


class AFM:

    def __init__(self, fh):
        """
        Parse the AFM file in file object *fh*
        """
        (dhead, dcmetrics_ascii, dcmetrics_name, dkernpairs, dcomposite) = \
            parse_afm(fh)
        self._header = dhead
        self._kern = dkernpairs
        self._metrics = dcmetrics_ascii
        self._metrics_by_name = dcmetrics_name
        self._composite = dcomposite


    def get_bbox_char(self, c, isord=False):
        if not isord: c=ord(c)
        wx, name, bbox = self._metrics[c]
        return bbox

    def string_width_height(self, s):
        """
        Return the string width (including kerning) and string height
        as a (*w*, *h*) tuple.
        """
        if not len(s): return 0,0
        totalw = 0
        namelast = None
        miny = 1e9
        maxy = 0
        for c in s:
            if c == '\n': continue
            wx, name, bbox = self._metrics[ord(c)]
            l,b,w,h = bbox

            # find the width with kerning
            try: kp = self._kern[ (namelast, name) ]
            except KeyError: kp = 0
            totalw += wx + kp

            # find the max y
            thismax = b+h
            if thismax>maxy: maxy = thismax

            # find the min y
            thismin = b
            if thismin<miny: miny = thismin

        return totalw, maxy-miny

    def get_str_bbox_and_descent(self, s):
        """
        Return the string bounding box
        """
        if not len(s): return 0,0,0,0
        totalw = 0
        namelast = None
        miny = 1e9
        maxy = 0
        left = 0
        if not isinstance(s, unicode):
            s = s.decode()
        for c in s:
            if c == '\n': continue
            name = uni2type1.get(ord(c), 'question')
            try:
                wx, bbox = self._metrics_by_name[name]
            except KeyError:
                name = 'question'
                wx, bbox = self._metrics_by_name[name]
            l,b,w,h = bbox
            if l<left: left = l
            # find the width with kerning
            try: kp = self._kern[ (namelast, name) ]
            except KeyError: kp = 0
            totalw += wx + kp

            # find the max y
            thismax = b+h
            if thismax>maxy: maxy = thismax

            # find the min y
            thismin = b
            if thismin<miny: miny = thismin

        return left, miny, totalw, maxy-miny, -miny


    def get_str_bbox(self, s):
        """
        Return the string bounding box
        """
        return self.get_str_bbox_and_descent(s)[:4]

    def get_name_char(self, c, isord=False):
        """
        Get the name of the character, ie, ';' is 'semicolon'
        """
        if not isord: c=ord(c)
        wx, name, bbox = self._metrics[c]
        return name

    def get_width_char(self, c, isord=False):
        """
        Get the width of the character from the character metric WX
        field
        """
        if not isord: c=ord(c)
        wx, name, bbox = self._metrics[c]
        return wx

    def get_width_from_char_name(self, name):
        """
        Get the width of the character from a type1 character name
        """
        wx, bbox = self._metrics_by_name[name]
        return wx

    def get_height_char(self, c, isord=False):
        """
        Get the height of character *c* from the bounding box.  This
        is the ink height (space is 0)
        """
        if not isord: c=ord(c)
        wx, name, bbox = self._metrics[c]
        return bbox[-1]

    def get_kern_dist(self, c1, c2):
        """
        Return the kerning pair distance (possibly 0) for chars *c1*
        and *c2*
        """
        name1, name2 = self.get_name_char(c1), self.get_name_char(c2)
        return self.get_kern_dist_from_name(name1, name2)

    def get_kern_dist_from_name(self, name1, name2):
        """
        Return the kerning pair distance (possibly 0) for chars
        *name1* and *name2*
        """
        try: return self._kern[ (name1, name2) ]
        except: return 0

    def get_fontname(self):
        "Return the font name, eg, 'Times-Roman'"
        return self._header['FontName']

    def get_fullname(self):
        "Return the font full name, eg, 'Times-Roman'"
        name = self._header.get('FullName')
        if name is None: # use FontName as a substitute
            name = self._header['FontName']
        return name

    def get_familyname(self):
        "Return the font family name, eg, 'Times'"
        name = self._header.get('FamilyName')
        if name is not None:
            return name

        # FamilyName not specified so we'll make a guess
        name = self.get_fullname()
        extras = r'(?i)([ -](regular|plain|italic|oblique|bold|semibold|light|ultralight|extra|condensed))+$'
        return re.sub(extras, '', name)

    def get_weight(self):
        "Return the font weight, eg, 'Bold' or 'Roman'"
        return self._header['Weight']

    def get_angle(self):
        "Return the fontangle as float"
        return self._header['ItalicAngle']

    def get_capheight(self):
        "Return the cap height as float"
        return self._header['CapHeight']

    def get_xheight(self):
        "Return the xheight as float"
        return self._header['XHeight']

    def get_underline_thickness(self):
        "Return the underline thickness as float"
        return self._header['UnderlineThickness']

    def get_horizontal_stem_width(self):
        """
        Return the standard horizontal stem width as float, or *None* if
        not specified in AFM file.
        """
        return self._header.get('StdHW', None)

    def get_vertical_stem_width(self):
        """
        Return the standard vertical stem width as float, or *None* if
        not specified in AFM file.
        """
        return self._header.get('StdVW', None)


if __name__=='__main__':
    #pathname = '/usr/local/lib/R/afm/'
    pathname = '/usr/local/share/fonts/afms/adobe'

    for fname in os.listdir(pathname):
        fh = file(os.path.join(pathname,fname))
        afm = AFM(fh)
        w,h =  afm.string_width_height('John Hunter is the Man!')
python-matplotlib 1.1.1~rc1+git20120423-0ubuntu1 / usr / share / pyshared / matplotlib / afm.py