/usr/share/pyshared/musiclibrarian/serialize.py

# serialize.py - serialize a restricted set of Python data objects.
#
#   Copyright (C) 2003 Daniel Burrows <dburrows@debian.org>
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# The file format of ConfigParser is poorly specified and tends to be
# a lossy way to store Python values.  It's also rather unpleasant to
# put structured data in it.
#
# These routines read and write a well-defined file format which can
# losslessly represent a subset of Python objects (a plus over
# ConfigParser); they *only* handle a particular subset of Python
# objects, so they can be safely used on arbitrary input files (a plus
# over pickle -- sometimes there's such a thing as being TOO
# expressive!), and they are extensible: you can add your own object
# types and syntax.
#
# This is sort of an "s-expressions on steroids for Python".  It is
# distinguished from XML mainly in that the input files are meant to
# be read-write without the aid of a specialized editor.
#
# Note: if you pass in a file object generated by codecs.open(), the
#       underlying file will have the appropriate encoding (eg, utf8).

import re
import types

# Helpers:

# Skip leading whitespace on a seekable file, return the next
# non-whitespace character and the location immediately preceding it.
def skip_ws(f):
    loc=f.tell()
    c=f.read(1)
    while c.isspace():
        loc=f.tell()
        c=f.read(1)

    return loc,c

# A class which can read Python expressions from files.
#
# The method "read" returns the next Python expression in the file.
#
# This class recognizes the following syntax:
#
#   PEXP ::=  <integer>
#           | <float>
#           | None | True | False
#           | <string-with-escaping>
#           | \( (PEXP ,?)* \)
#           | [ (PEXP ,?)* ]
#           | { (PEXP : PEXP ,?)* }
#           | <identifier> \( PEXP ,?)* \)
#
# All tokens MUST be whitespace-separated.  The only exception: the
# delimiters of tuples, lists, and dictionaries are exempt from this
# requirement, as are commas.
#
# <string-with-escaping> is a string bracketed by single quotes (').
# Presently known escape sequences are \\, \', \b, and \n.  Unknown
# escape sequences are handled as in Python (the backslash is left
# in).
#
# The last case is what allows extension by plugging in a new parser.
# Currently no support is provided for cross-references, so graphs
# cannot be directly represented (but of course you can convert a
# graph to a sequence of tuples)
#
# The idea is to allow all non-executable (=safe) data types.  Of
# course, since the extensions can be general functions, the burden is
# on the extension writer to ensure their safety.
#
# A given Reader can apply to any file which supports
# seeking. (handling lookahead internally would allow this to
# generalize to any file)
#
# FIXME: handle more of the many types of integers that Python knows
# about. (mainly requires more tests when "starting" a number?)
#
# FIXME: This is dreadfully inefficient.  Could regexes be used?
#
# FIXME: Don't require a seekable file.  I only need one character of
#        lookahead, so this shouldn't be too hard. (OTOH, how often is
#        seeking necessary and how expensive is it?)
class Reader:
    # Each extension is a function which takes a tuple of arguments
    # and returns a Python object.
    def __init__(self, extensions = {}):
        self.extensions=extensions

    # Read one Python expression from the given file.
    def read(self, file):
        # Get the first character.
        loc,c=skip_ws(file)

        # Dispatch.
        if c == '':
            raise IOError, 'Unexpected end of file'
        elif (c >= '0' and c <='9') or c=='-' or c == '.':
            file.seek(loc)
            return self.__read_number(file)
        elif c == '\'':
            return self.__read_string_tail(file)
        elif c == '(':
            return self.__read_tuple_tail(file)
        elif c == '[':
            return self.__read_list_tail(file)
        elif c == '{':
            return self.__read_dict_tail(file)
        elif c.isalpha():
            file.seek(loc)
            return self.__read_extension_or_constant(file)
        else:
            raise IOError, 'Can\'t parse token starting with \'%s\''%c

    def __read_number(self, file):
        s=file.read(1)
        if s == '':
            raise IOError, 'Unexpected end of file'

        loc=file.tell()
        c=file.read(1)
        while not c.isspace() and not c in ['', '(','{','[',']','}',')',',']:
            s+=c
            loc=file.tell()
            c=file.read(1)

        file.seek(loc)

        # Make integers by default
        try:
            return int(s)
        except ValueError:
            try:
                return long(s)
            except ValueError:
                return float(s)

    def __read_string_tail(self, file):
        s=''

        c=file.read(1)
        while c <> '\'':
            if c == '\\':
                c=file.read(1)
                if c == '\\' or c == '\'':
                    s+=c
                elif c == 'n':
                    s+='\n'
                elif c == 'b':
                    s+='\b'
                else:
                    s+='\\'
                    s+=c
            elif c == '':
                raise IOError, 'EOF inside string'
            else:
                s+=c

            c=file.read(1)

        return s

    def __read_tuple_tail(self, file):
        loc,c=skip_ws(file)

        lst=[]

        while c <> ')':
            if c == '':
                raise IOError, 'EOF inside tuple'

            file.seek(loc)
            lst.append(self.read(file))
            loc,c=skip_ws(file)
            if c == ',':
                loc,c=skip_ws(file)

        return tuple(lst)

    def __read_list_tail(self, file):
        loc,c=skip_ws(file)

        lst=[]

        while c <> ']':
            if c == '':
                raise IOError, 'EOF inside list'

            file.seek(loc)
            lst.append(self.read(file))
            loc,c=skip_ws(file)
            if c == ',':
                loc,c=skip_ws(file)

        return lst

    def __read_dict_tail(self, file):
        loc,c=skip_ws(file)

        dict={}

        while c <> '}':
            if c == '':
                raise IOError, 'EOF inside dictionary'

            file.seek(loc)
            key=self.read(file)

            loc,c=skip_ws(file)
            if c <> ':':
                raise IOError, 'Parse error: expected \':\''

            val=self.read(file)
            dict[key]=val
            loc,c=skip_ws(file)
            if c == ',':
                loc,c=skip_ws(file)

        return dict

    def __read_extension_or_constant(self, file):
        # Get the identifier:
        name=''

        loc=file.tell()
        c=file.read(1)
        # Assume that the first character was already found to be
        # alphabetic.
        while c.isalnum():
            name+=c
            loc=file.tell()
            c=file.read(1)

        if name == 'None':
            return None
        elif name == 'True':
            return True
        elif name == 'False':
            return False
        elif not self.extensions.has_key(name):
            raise IOError, 'Parse error: "%s" is not a function'%name

        if c.isspace():
            loc,c=skip_ws(file)

        if c <> '(':
            raise IOError, 'Expected \'(\' after call of function "%s"'%name

        args=self.__read_tuple_tail(file)
        return apply(self.extensions[name], args)

# Converse to the above.
#
# Extensions are supported only under the condition that there is a
# simple test for their applicability.
class Writer:
    # Each extension is a tuple (applies, name, writer): applies takes a
    # Python object and returns a boolean value; if it returns True,
    # the writer is called with a single argument (obj); it should return
    # a tuple indicating the "arguments" to the extension.
    def __init__(self, extensions=[]):
        self.extensions=extensions

    # convenience for debugging
    def writeln(self, file, obj, indent=0):
        self.write(file, obj, indent)
        file.write('\n')

    def write(self, file, obj, indent=0):
        t=type(obj)

        if obj == None:
            file.write('None')
        elif obj == True:
            file.write('True')
        elif obj == False:
            file.write('False')
        elif isinstance(obj, basestring):
            self.__write_string(file, obj)
        elif t == types.IntType or t == types.LongType or t == types.FloatType:
            file.write(`obj`)
        elif t == types.ListType:
            self.__write_list(file, obj, indent)
        elif t == types.TupleType:
            self.__write_tuple(file, obj, indent)
        elif t == types.DictType:
            self.__write_dict(file, obj, indent)
        else:
            for test, name, writer in self.extensions:
                if test(obj):
                    output=writer(obj)

                    if type(output) <> types.TupleType:
                        raise IOError,'Extension tried to write a non-tuple: %s'%obj
                    file.write(name)

                    self.__write_tuple(file, output, indent+len(name))
                    return
            raise IOError,'I don\'t know how to serialize %s'%`obj`

    # What to escape
    __escapere=re.compile('([\\\\\'\n\b])')

    def __write_string(self, file, obj):
        def doescape(g):
            c=g.group(0)
            if c == '\\' or c == '\'':
                return '\\'+c
            elif c == '\n':
                return '\\n'
            elif c == '\b':
                return '\\b'
            else:
                raise IOError, 'No inverse to escape %s'%c

        file.write('\'')
        file.write(re.sub(Writer.__escapere, doescape, obj))
        file.write('\'')

    # Very aggressive about linewrapping here: perhaps I should buffer
    # things instead?
    def __write_seq(self, file, obj, indent, start, end):
        file.write(start)
        first=True
        for x in obj:
            if not first:
                file.write(',\n'+(' '*(indent+1)))
            else:
                first=False
            self.write(file, x, indent+1)
        file.write(end)

    def __write_list(self, file, obj, indent):
        self.__write_seq(file, obj, indent, '[', ']')

    def __write_tuple(self, file, obj, indent):
        self.__write_seq(file, obj, indent, '(', ',)')

    def __write_dict(self, file, obj, indent):
        file.write('{')
        items=obj.items()
        # Sort the items, to make the dictionary 'nicer'
        items.sort(lambda a,b:cmp(a[0], b[0]))
        first=True

        for key,val in items:
            if not first:
                file.write(',\n'+(' '*(indent+1)))
            else:
                first=False
            self.write(file, key, indent+1)
            file.write('\n'+ (' '*(indent+5))+': ')
            self.write(file, val, indent+7)

        file.write('}')
musiclibrarian 1.6-2.2 / usr / share / pyshared / musiclibrarian / serialize.py