/usr/share/pyshared/axiom/store.py

# Copyright 2008 Divmod, Inc.  See LICENSE for details
# -*- test-case-name: axiom.test -*-

"""
This module holds the Axiom Store class and related classes, such as queries.
"""

from epsilon import hotfix
hotfix.require('twisted', 'filepath_copyTo')

import time, os, itertools, warnings, sys, operator, weakref

from zope.interface import implements

from twisted.python import log
from twisted.python.failure import Failure
from twisted.python import filepath
from twisted.internet import defer
from twisted.python.reflect import namedAny
from twisted.python.util import unsignedID
from twisted.application.service import IService, IServiceCollection

from epsilon.pending import PendingEvent
from epsilon.cooperator import SchedulingService

from axiom import _schema, attributes, upgrade, _fincache, iaxiom, errors
from axiom import item
from axiom._pysqlite2 import Connection

from axiom.item import \
    _typeNameToMostRecentClass, declareLegacyItem, \
    _legacyTypes, Empowered, serviceSpecialCase, _StoreIDComparer

IN_MEMORY_DATABASE = ':memory:'

# The special storeID used to mark the store itself as the target of a
# reference.
STORE_SELF_ID = -1

tempCounter = itertools.count()

# A mapping from MetaItem instances to precomputed structures describing the
# indexes necessary for those MetaItems.  Avoiding recomputing this speeds up
# opening stores significantly.
_requiredTableIndexes = weakref.WeakKeyDictionary()

# A mapping from MetaItem instances to precomputed structures describing the
# known in-memory schema for those MetaItems.  Avoiding recomputing this speeds
# up opening stores significantly.
_inMemorySchemaCache = weakref.WeakKeyDictionary()



class NoEmptyItems(Exception):
    """You must define some attributes on every item.
    """

def _mkdirIfNotExists(dirname):
    if os.path.isdir(dirname):
        return False
    os.makedirs(dirname)
    return True

class AtomicFile(file):
    """I am a file which is moved from temporary to permanent storage when it
    is closed.

    After I'm closed, I will have a 'finalpath' property saying where I went.
    """

    implements(iaxiom.IAtomicFile)

    def __init__(self, tempname, destpath):
        """
        Create an AtomicFile.  (Note: AtomicFiles can only be opened in
        write-binary mode.)

        @param tempname: The filename to open for temporary storage.

        @param destpath: The filename to move this file to when .close() is
        called.
        """
        self._destpath = destpath
        file.__init__(self, tempname, 'w+b')

    def close(self):
        """
        Close this file and commit it to its permanent location.

        @return: a Deferred which fires when the file has been moved (and
        backed up to tertiary storage, if necessary).
        """
        now = time.time()
        try:
            file.close(self)
            _mkdirIfNotExists(self._destpath.dirname())
            self.finalpath = self._destpath
            os.rename(self.name, self.finalpath.path)
            os.utime(self.finalpath.path, (now, now))
        except:
            return defer.fail()
        return defer.succeed(self.finalpath)


    def abort(self):
        os.unlink(self.name)


_noItem = object()              # tag for optional argument to getItemByID
                                # default



def storeServiceSpecialCase(st, pups):
    """
    Adapt a store to L{IServiceCollection}.

    @param st: The L{Store} to adapt.
    @param pups: A list of L{IServiceCollection} powerups on C{st}.

    @return: An L{IServiceCollection} which has all of C{pups} as children.
    """
    if st.parent is not None:
        # If for some bizarre reason we're starting a substore's service, let's
        # just assume that its parent is running its upgraders, rather than
        # risk starting the upgrader run twice. (XXX: it *IS* possible to
        # figure out whether we need to or not, I just doubt this will ever
        # even happen in practice -- fix here if it does)
        return serviceSpecialCase(st, pups)
    if st._axiom_service is not None:
        # not new, don't add twice.
        return st._axiom_service

    collection = serviceSpecialCase(st, pups)

    st._upgradeService.setServiceParent(collection)

    if st.dbdir is not None:
        from axiom import batch
        batcher = batch.BatchProcessingControllerService(st)
        batcher.setServiceParent(collection)

    return collection


def _typeIsTotallyUnknown(typename, version):
    return ((typename not in _typeNameToMostRecentClass)
            and ((typename, version) not in _legacyTypes))



class BaseQuery:
    """
    This is the abstract base implementation of query logic shared between item
    and attribute queries.

    Note: as this is an abstract class, it doesn't *actually* implement IQuery,
    but all its subclasses must, so it is declared to.  Don't instantiate it
    directly.
    """
    # XXX: need a better convention for this sort of
    # abstract-but-provide-most-of-a-base-implementation thing. -glyph

    # How about not putting the implements(iaxiom.IQuery) here, but on
    # subclasses instead? -exarkun

    implements(iaxiom.IQuery)

    def __init__(self, store, tableClass,
                 comparison=None, limit=None,
                 offset=None, sort=None):
        """
        Create a generic object-oriented interface to SQL, used to implement
        Store.query.

        @param store: the store that this query is within.

        @param tableClass: a subclass of L{Item}.

        @param comparison: an implementor of L{iaxiom.IComparison}

        @param limit: an L{int} that limits the number of results that will be
        queried for, or None to indicate that all results should be returned.

        @param offset: an L{int} that specifies the offset within the query
        results to begin iterating from, or None to indicate that we should
        start at 0.

        @param sort: A sort order object.  Obtained by doing
        C{YourItemClass.yourAttribute.ascending} or C{.descending}.
        """

        self.store = store
        self.tableClass = tableClass
        self.comparison = comparison
        self.limit = limit
        self.offset = offset
        self.sort = iaxiom.IOrdering(sort)
        tables = self._involvedTables()
        self._computeFromClause(tables)


    _cloneAttributes = 'store tableClass comparison limit offset sort'.split()

    # IQuery
    def cloneQuery(self, limit=_noItem):
        clonekw = {}
        for attr in self._cloneAttributes:
            clonekw[attr] = getattr(self, attr)
        if limit is not _noItem:
            clonekw['limit'] = limit
        return self.__class__(**clonekw)


    def __repr__(self):
        return self.__class__.__name__ + '(' + ', '.join([
                repr(self.store),
                repr(self.tableClass),
                repr(self.comparison),
                repr(self.limit),
                repr(self.offset),
                repr(self.sort)]) + ')'


    def explain(self):
        """
        A debugging API, exposing SQLite's I{EXPLAIN} statement.

        While this is not a private method, you also probably don't have any
        use for it unless you understand U{SQLite
        opcodes<http://www.sqlite.org/opcode.html>} very well.

        Once you do, it can be handy to call this interactively to get a sense
        of the complexity of a query.

        @return: a list, the first element of which is a L{str} (the SQL
        statement which will be run), and the remainder of which is 3-tuples
        resulting from the I{EXPLAIN} of that statement.
        """
        return ([self._sqlAndArgs('SELECT', self._queryTarget)[0]] +
                self._runQuery('EXPLAIN SELECT', self._queryTarget))


    def _involvedTables(self):
        """
        Return a list of tables involved in this query,
        first checking that no required tables (those in
        the query target) have been omitted from the comparison.
        """
        # SQL and arguments
        if self.comparison is not None:
            tables = self.comparison.getInvolvedTables()
            self.args = self.comparison.getArgs(self.store)
        else:
            tables = [self.tableClass]
            self.args = []

        if self.tableClass not in tables:
            raise ValueError(
                "Comparison omits required reference to result type")

        return tables

    def _computeFromClause(self, tables):
        """
        Generate the SQL string which follows the "FROM" string and before the
        "WHERE" string in the final SQL statement.
        """
        tableAliases = []
        self.fromClauseParts = []
        for table in tables:
            # The indirect calls to store.getTableName() will create the tables
            # if needed. (XXX That's bad, actually.   They should get created
            # some other way if necessary.  -exarkun)
            tableName = table.getTableName(self.store)
            tableAlias = table.getTableAlias(self.store, tuple(tableAliases))
            if tableAlias is None:
                self.fromClauseParts.append(tableName)
            else:
                tableAliases.append(tableAlias)
                self.fromClauseParts.append('%s AS %s' % (tableName,
                                                          tableAlias))

        self.sortClauseParts = []
        for attr, direction in self.sort.orderColumns():
            assert direction in ('ASC', 'DESC'), "%r not in ASC,DESC" % (direction,)
            if attr.type not in tables:
                raise ValueError(
                    "Ordering references type excluded from comparison")
            self.sortClauseParts.append(
                '%s %s' % (attr.getColumnName(self.store), direction))


    def _sqlAndArgs(self, verb, subject):
        limitClause = []
        if self.limit is not None:
            # XXX LIMIT and OFFSET used to be using ?, but they started
            # generating syntax errors in places where generating the whole SQL
            # statement does not.  this smells like a bug in sqlite's parser to
            # me, but I don't know my SQL syntax standards well enough to be
            # sure -glyph
            if not isinstance(self.limit, (int, long)):
                raise TypeError("limit must be an integer: %r" % (self.limit,))
            limitClause.append('LIMIT')
            limitClause.append(str(self.limit))
            if self.offset is not None:
                if not isinstance(self.offset, (int, long)):
                    raise TypeError("offset must be an integer: %r" % (self.offset,))
                limitClause.append('OFFSET')
                limitClause.append(str(self.offset))
        else:
            assert self.offset is None, 'Offset specified without limit'

        sqlParts = [verb, subject]
        if self.fromClauseParts:
            sqlParts.extend(['FROM', ', '.join(self.fromClauseParts)])
        if self.comparison is not None:
            sqlParts.extend(['WHERE', self.comparison.getQuery(self.store)])
        if self.sortClauseParts:
            sqlParts.extend(['ORDER BY', ', '.join(self.sortClauseParts)])
        if limitClause:
            sqlParts.append(' '.join(limitClause))
        sqlstr = ' '.join(sqlParts)
        return (sqlstr, self.args)


    def _runQuery(self, verb, subject):
        # XXX ideally this should be creating an SQL cursor and iterating
        # through that so we don't have to load the whole query into memory,
        # but right now Store's interface to SQL is all through one cursor.
        # I'm not sure how to do this and preserve the chokepoint so that we
        # can do, e.g. transaction fallbacks.
        t = time.time()
        if not self.store.autocommit:
            self.store.checkpoint()
        sqlstr, sqlargs = self._sqlAndArgs(verb, subject)
        sqlResults = self.store.querySQL(sqlstr, sqlargs)
        cs = self.locateCallSite()
        log.msg(interface=iaxiom.IStatEvent,
                querySite=cs, queryTime=time.time() - t, querySQL=sqlstr)
        return sqlResults

    def locateCallSite(self):
        i = 3
        frame = sys._getframe(i)
        while frame.f_code.co_filename == __file__:
            #let's not get stuck in findOrCreate, etc
            i += 1
            frame = sys._getframe(i)
        return (frame.f_code.co_filename, frame.f_lineno)


    def _selectStuff(self, verb='SELECT'):
        """
        Return a generator which yields the massaged results of this query with
        a particular SQL verb.

        For an attribute query, massaged results are of the type of that
        attribute.  For an item query, they are items of the type the query is
        supposed to return.

        @param verb: a str containing the SQL verb to execute.  This really
        must be some variant of 'SELECT', the only two currently implemented
        being 'SELECT' and 'SELECT DISTINCT'.
        """
        sqlResults = self._runQuery(verb, self._queryTarget)
        for row in sqlResults:
            yield self._massageData(row)


    def _massageData(self, row):
        """
        Subclasses must override this method to 'massage' the data received
        from the database, converting it from data direct from the database
        into Python objects of the appropriate form.

        @param row: a tuple of some kind, representing an element of data
        returned from a call to sqlite.
        """
        raise NotImplementedError()


    def distinct(self):
        """
        Call this method if you want to avoid repeated results from a query.

        You can call this on either an attribute or item query.  For example,
        on an attribute query::

            X(store=s, value=1, name=u'foo')
            X(store=s, value=1, name=u'bar')
            X(store=s, value=2, name=u'baz')
            X(store=s, value=3, name=u'qux')
            list(s.query(X).getColumn('value'))
                => [1, 1, 2, 3]
            list(s.query(X).getColumn('value').distinct())
                => [1, 2, 3]

        You can also use distinct queries to eliminate duplicate results from
        joining two Item types together in a query, like so::

            x = X(store=s, value=1, name=u'hello')
            Y(store=s, other=x, ident=u'a')
            Y(store=s, other=x, ident=u'b')
            Y(store=s, other=x, ident=u'b+')
            list(s.query(X, AND(Y.other == X.storeID,
                                Y.ident.startswith(u'b'))))
                => [X(name=u'hello', value=1, storeID=1)@...,
                    X(name=u'hello', value=1, storeID=1)@...]
            list(s.query(X, AND(Y.other == X.storeID,
                                Y.ident.startswith(u'b'))).distinct())
                => [X(name=u'hello', value=1, storeID=1)@...]

        @return: an L{iaxiom.IQuery} provider whose values are distinct.
        """
        return _DistinctQuery(self)


    def __iter__(self):
        """
        Iterate the results of this query.
        """
        return self._selectStuff('SELECT')


    _selfiter = None
    def next(self):
        """
        This method is deprecated, a holdover from when queries were iterators,
        rather than iterables.

        @return: one element of massaged data.
        """
        if self._selfiter is None:
            warnings.warn(
                "Calling 'next' directly on a query is deprecated. "
                "Perhaps you want to use iter(query).next(), or something "
                "more expressive like store.findFirst or store.findOrCreate?",
                DeprecationWarning, stacklevel=2)
            self._selfiter = self.__iter__()
        return self._selfiter.next()



class _FakeItemForFilter:
    __legacy__ = False
    def __init__(self, store):
        self.store = store


def _isColumnUnique(col):
    """
    Determine if an IColumn provider is unique.

    @param col: an L{IColumn} provider
    @return: True if the IColumn provider is unique, False otherwise.
    """
    return isinstance(col, _StoreIDComparer)

class ItemQuery(BaseQuery):
    """
    This class is a query whose results will be Item instances.  This is the
    type always returned from L{Store.query}.
    """

    def __init__(self, *a, **k):
        """
        Create an ItemQuery.  This is typically done via L{Store.query}.
        """
        BaseQuery.__init__(self, *a, **k)
        self._queryTarget = (
            self.tableClass.storeID.getColumnName(self.store) + ', ' + (
                ', '.join(
                    [attrobj.getColumnName(self.store)
                     for name, attrobj in self.tableClass.getSchema()
                     ])))


    def paginate(self, pagesize=20):
        """
        Split up the work of gathering a result set into multiple smaller
        'pages', allowing very large queries to be iterated without blocking
        for long periods of time.

        While simply iterating C{paginate()} is very similar to iterating a
        query directly, using this method allows the work to obtain the results
        to be performed on demand, over a series of different transaction.

        @param pagesize: the number of results gather in each chunk of work.
        (This is mostly for testing paginate's implementation.)
        @type pagesize: L{int}

        @return: an iterable which yields all the results of this query.
        """

        sort = self.sort
        oc = list(sort.orderColumns())
        if not oc:
            # You can't have an unsorted pagination.
            sort = self.tableClass.storeID.ascending
            oc = list(sort.orderColumns())
        if len(oc) != 1:
            raise RuntimeError("%d-column sorts not supported yet with paginate" %(len(oc),))
        sortColumn = oc[0][0]
        if oc[0][1] == 'ASC':
            sortOp = operator.gt
        else:
            sortOp = operator.lt
        if _isColumnUnique(sortColumn):
            # This is the easy case.  There is never a tie to be broken, so we
            # can just remember our last value and yield from there.  Right now
            # this only happens when the column is a storeID, but hopefully in
            # the future we will have more of this.
            tiebreaker = None
        else:
            tiebreaker = self.tableClass.storeID

        tied = lambda a, b: (sortColumn.__get__(a) ==
                             sortColumn.__get__(b))
        def _AND(a, b):
            if a is None:
                return b
            return attributes.AND(a, b)

        results = list(self.store.query(self.tableClass, self.comparison,
                                        sort=sort, limit=pagesize + 1))
        while results:
            if len(results) == 1:
                # XXX TODO: reject 0 pagesize.  If the length of the result set
                # is 1, there's no next result to test for a tie with, so we
                # must be at the end, and we should just yield the result and finish.
                yield results[0]
                return
            for resultidx in range(len(results) - 1):
                # check for a tie.
                result = results[resultidx]
                nextResult = results[resultidx + 1]
                if tied(result, nextResult):
                    # Yield any ties first, in the appropriate order.
                    lastTieBreaker = tiebreaker.__get__(result)
                    # Note that this query is _NOT_ limited: currently large ties
                    # will generate arbitrarily large amounts of work.
                    trq = self.store.query(
                        self.tableClass,
                        _AND(self.comparison,
                             sortColumn == sortColumn.__get__(result)))
                    tiedResults = list(trq)
                    tiedResults.sort(key=lambda rslt: (sortColumn.__get__(result),
                                                       tiebreaker.__get__(result)))
                    for result in tiedResults:
                        yield result
                    # re-start the query here ('result' is set to the
                    # appropriate value by the inner loop)
                    break
                else:
                    yield result

            lastSortValue = sortColumn.__get__(result) # hooray namespace pollution
            results = list(self.store.query(
                    self.tableClass,
                    _AND(self.comparison,
                         sortOp(sortColumn,
                                sortColumn.__get__(result))),
                    sort=sort,
                    limit=pagesize + 1))

    def _massageData(self, row):
        """
        Convert a row into an Item instance by loading cached items or
        creating new ones based on query results.

        @param row: an n-tuple, where n is the number of columns specified by
        my item type.

        @return: an instance of the type specified by this query.
        """
        result = self.store._loadedItem(self.tableClass, row[0], row[1:])
        assert result.store is not None, "result %r has funky store" % (result,)
        return result


    def getColumn(self, attributeName, raw=False):
        """
        Get an L{iaxiom.IQuery} whose results will be values of a single
        attribute rather than an Item.

        @param attributeName: a L{str}, the name of a Python attribute, that
        describes a column on the Item subclass that this query was specified
        for.

        @return: an L{AttributeQuery} for the column described by the attribute
        named L{attributeName} on the item class that this query's results will
        be instances of.
        """
        # XXX: 'raw' is undocumented because I think it's completely unused,
        # and it's definitely untested.  It should probably be removed when
        # someone has the time. -glyph

        # Quotient POP3 server uses it.  Not that it shouldn't be removed.
        # ;) -exarkun
        attr = getattr(self.tableClass, attributeName)
        return AttributeQuery(self.store,
                              self.tableClass,
                              self.comparison,
                              self.limit,
                              self.offset,
                              self.sort,
                              attr,
                              raw)


    def count(self):
        rslt = self._runQuery(
            'SELECT',
            'COUNT(' + self.tableClass.storeID.getColumnName(self.store)
            + ')')
        assert len(rslt) == 1, 'more than one result: %r' % (rslt,)
        return rslt[0][0] or 0


    def deleteFromStore(self):
        """
        Delete all the Items which are found by this query.
        """
        #We can do this the fast way or the slow way.

        # If there's a 'deleted' callback on the Item type or 'deleteFromStore'
        # is overridden, we have to do it the slow way.
        deletedOverridden = (
            self.tableClass.deleted.im_func is not item.Item.deleted.im_func)
        deleteFromStoreOverridden = (
            self.tableClass.deleteFromStore.im_func is not
            item.Item.deleteFromStore.im_func)

        if deletedOverridden or deleteFromStoreOverridden:
            for it in self:
                it.deleteFromStore()
        else:

            # Find other item types whose instances need to be deleted
            # when items of the type in this query are deleted, and
            # remove them from the store.
            def itemsToDelete(attr):
                return attr.oneOf(self.getColumn("storeID"))

            if not item.allowDeletion(self.store, self.tableClass, itemsToDelete):
                raise errors.DeletionDisallowed(
                    'Cannot delete item; '
                    'has referents with whenDeleted == reference.DISALLOW')

            for it in item.dependentItems(self.store,
                                          self.tableClass, itemsToDelete):
                it.deleteFromStore()

            # actually run the DELETE for the items in this query.
            self._runQuery('DELETE', "")

class MultipleItemQuery(BaseQuery):
    """
    A query that returns tuples of Items from a join.
    """

    def __init__(self, *a, **k):
        """
        Create a MultipleItemQuery.  This is typically done via L{Store.query}.
        """
        BaseQuery.__init__(self, *a, **k)

        # Just in case it's some other kind of iterable.
        self.tableClass = tuple(self.tableClass)

        if len(self.tableClass) == 0:
            raise ValueError("Multiple item queries must have "
                             "at least one table class")

        targets = []

        # Later when we massage data out, we need to slice the row.
        # This records the slice lengths.
        self.schemaLengths = []

        # self.tableClass is a tuple of Item classes.
        for tableClass in self.tableClass:

            schema = tableClass.getSchema()

            # The extra 1 is oid
            self.schemaLengths.append(len(schema) + 1)

            targets.append(
                tableClass.storeID.getColumnName(self.store) + ', ' + (
                ', '.join(
                [attrobj.getColumnName(self.store)
                 for name, attrobj in schema
                 ])))

        self._queryTarget = ', '.join(targets)

    def _involvedTables(self):
        """
        Return a list of tables involved in this query,
        first checking that no required tables (those in
        the query target) have been omitted from the comparison.
        """
        # SQL and arguments
        if self.comparison is not None:
            tables = self.comparison.getInvolvedTables()
            self.args = self.comparison.getArgs(self.store)
        else:
            tables = list(self.tableClass)
            self.args = []

        for tableClass in self.tableClass:
            if tableClass not in tables:
                raise ValueError(
                    "Comparison omits required reference to result type %s"
                    % tableClass.typeName)

        return tables

    def _massageData(self, row):

        """
        Convert a row into a tuple of Item instances, by slicing it
        according to the number of columns for each instance, and then
        proceeding as for ItemQuery._massageData.

        @param row: an n-tuple, where n is the total number of columns
        specified by all the item types in this query.

        @return: a tuple of instances of the types specified by this query.
        """
        offset = 0
        resultBits = []

        for i, tableClass in enumerate(self.tableClass):
            numAttrs = self.schemaLengths[i]

            result = self.store._loadedItem(self.tableClass[i],
                                            row[offset],
                                            row[offset+1:offset+numAttrs])
            assert result.store is not None, "result %r has funky store" % (result,)
            resultBits.append(result)

            offset += numAttrs

        return tuple(resultBits)

    def count(self):
        """
        Count the number of distinct results of the wrapped query.

        @return: an L{int} representing the number of distinct results.
        """
        if not self.store.autocommit:
            self.store.checkpoint()
        target = ', '.join([
            tableClass.storeID.getColumnName(self.store)
            for tableClass in self.tableClass ])
        sql, args = self._sqlAndArgs('SELECT', target)
        sql = 'SELECT COUNT(*) FROM (' + sql + ')'
        result = self.store.querySQL(sql, args)
        assert len(result) == 1, 'more than one result: %r' % (result,)
        return result[0][0] or 0

    def distinct(self):
        """
        @return: an L{iaxiom.IQuery} provider whose values are distinct.
        """
        return _MultipleItemDistinctQuery(self)

class _DistinctQuery(object):
    """
    A query for results excluding duplicates.

    Results from this query depend on the query it was initialized with.
    """
    implements(iaxiom.IQuery)

    def __init__(self, query):
        """
        Create a distinct query, based on another query.

        @param query: an instance of a L{BaseQuery} subclass.  Note: an IQuery
        provider is not sufficient, this class relies on implementation details
        of L{BaseQuery}.
        """
        self.query = query
        self.store = query.store
        self.limit = query.limit


    def cloneQuery(self, limit=_noItem):
        """
        Clone the original query which this distinct query wraps, and return a new
        wrapper around that clone.
        """
        newq = self.query.cloneQuery(limit=limit)
        return self.__class__(newq)


    def __iter__(self):
        """
        Iterate the distinct results of the wrapped query.

        @return: a generator which yields distinct values from its delegate
        query, whether they are items or attributes.
        """
        return self.query._selectStuff('SELECT DISTINCT')


    def count(self):
        """
        Count the number of distinct results of the wrapped query.

        @return: an L{int} representing the number of distinct results.
        """
        if not self.query.store.autocommit:
            self.query.store.checkpoint()
        sql, args = self.query._sqlAndArgs(
            'SELECT DISTINCT',
            self.query.tableClass.storeID.getColumnName(self.query.store))
        sql = 'SELECT COUNT(*) FROM (' + sql + ')'
        result = self.query.store.querySQL(sql, args)
        assert len(result) == 1, 'more than one result: %r' % (result,)
        return result[0][0] or 0


class _MultipleItemDistinctQuery(_DistinctQuery):
    """
    Distinct query based on a MultipleItemQuery.
    """

    def count(self):
        """
        Count the number of distinct results of the wrapped query.

        @return: an L{int} representing the number of distinct results.
        """
        if not self.query.store.autocommit:
            self.query.store.checkpoint()
        target = ', '.join([
            tableClass.storeID.getColumnName(self.query.store)
            for tableClass in self.query.tableClass ])
        sql, args = self.query._sqlAndArgs(
            'SELECT DISTINCT',
            target)
        sql = 'SELECT COUNT(*) FROM (' + sql + ')'
        result = self.query.store.querySQL(sql, args)
        assert len(result) == 1, 'more than one result: %r' % (result,)
        return result[0][0] or 0


_noDefault = object()

class AttributeQuery(BaseQuery):
    """
    A query for the value of a single attribute from an item class, so as to
    load only a single value rather than an instantiating an entire item when
    the value is all that is needed.
    """
    def __init__(self,
                 store,
                 tableClass,
                 comparison=None, limit=None,
                 offset=None, sort=None,
                 attribute=None,
                 raw=False):
        BaseQuery.__init__(self, store, tableClass,
                           comparison, limit,
                           offset, sort)
        self.attribute = attribute
        self.raw = raw
        self._queryTarget = attribute.getColumnName(self.store)


    _cloneAttributes = BaseQuery._cloneAttributes + 'attribute raw'.split()


    def _massageData(self, row):
        """
        Convert a raw database row to the type described by an attribute.  For
        example, convert a database integer into an L{extime.Time} instance for
        an L{attributes.timestamp} attribute.

        @param row: a 1-tuple, containing the in-database value from my
        attribute.

        @return: a value of the type described by my attribute.
        """
        if self.raw:
            return row[0]
        return self.attribute.outfilter(row[0], _FakeItemForFilter(self.store))


    def count(self):
        """
        @return: the number of non-None values of this attribute specified by this query.
        """
        rslt = self._runQuery('SELECT', 'COUNT(%s)' % (self._queryTarget,)) or [(0,)]
        assert len(rslt) == 1, 'more than one result: %r' % (rslt,)
        return rslt[0][0]



    def sum(self):
        """
        Return the sum of all the values returned by this query.  If no results
        are specified, return None.

        Note: for non-numeric column types the result of this method will be
        nonsensical.

        @return: a number or None.
        """
        res = self._runQuery('SELECT', 'SUM(%s)' % (self._queryTarget,)) or [(0,)]
        assert len(res) == 1, "more than one result: %r" % (res,)
        dbval = res[0][0] or 0
        return self.attribute.outfilter(dbval, _FakeItemForFilter(self.store))


    def average(self):
        """
        Return the average value (as defined by the AVG implementation in the
        database) of the values specified by this query.

        Note: for non-numeric column types the result of this method will be
        nonsensical.

        @return: a L{float} representing the 'average' value of this column.
        """
        rslt = self._runQuery('SELECT', 'AVG(%s)' % (self._queryTarget,)) or [(0,)]
        assert len(rslt) == 1, 'more than one result: %r' % (rslt,)
        return rslt[0][0]


    def max(self, default=_noDefault):
        return self._functionOnTarget('MAX', default)


    def min(self, default=_noDefault):
        return self._functionOnTarget('MIN', default)


    def _functionOnTarget(self, which, default):
        rslt = self._runQuery('SELECT', '%s(%s)' %
                              (which, self._queryTarget,)) or [(None,)]
        assert len(rslt) == 1, 'more than one result: %r' % (rslt,)
        dbval = rslt[0][0]
        if dbval is None:
            if default is _noDefault:
                raise ValueError, '%s() on table with no items'%(which)
            else:
                return default
        return self.attribute.outfilter(dbval, _FakeItemForFilter(self.store))


def _storeBatchServiceSpecialCase(*args, **kwargs):
    """
    Trivial wrapper around L{batch.storeBatchServiceSpecialCase} to delay the
    import of axiom.batch, which imports the reactor, which we do not want as a
    side-effect of importing L{axiom.store} (as this would preclude selecting a
    reactor after importing this module; see #2864).
    """
    from axiom import batch
    return batch.storeBatchServiceSpecialCase(*args, **kwargs)


def _schedulerServiceSpecialCase(empowered, pups):
    """
    This function creates (or returns a previously created) L{IScheduler}
    powerup.

    If L{IScheduler} powerups were found on C{empowered}, the first of those
    is given priority.  Otherwise, a site L{Store} or a user L{Store} will
    have any pre-existing L{IScheduler} powerup associated with them (on the
    hackish cache attribute C{_schedulerService}) returned, or a new one
    created if none exists already.
    """
    from axiom.scheduler import _SiteScheduler, _UserScheduler

    # Give precedence to anything found in the store
    for pup in pups:
        return pup
    # If the empowered is a store, construct a scheduler for it.
    if isinstance(empowered, Store):
        if getattr(empowered, '_schedulerService', None) is None:
            if empowered.parent is None:
                sched = _SiteScheduler(empowered)
            else:
                sched = _UserScheduler(empowered)
            sched.setServiceParent(IService(empowered))
            empowered._schedulerService = sched
        return empowered._schedulerService
    return None


class Store(Empowered):
    """
    I am a database that Axiom Items can be stored in.

    Store an item in me by setting its 'store' attribute to be me.

    I can be created one of two ways::

        Store()                      # Create an in-memory database

        Store("/path/to/file.axiom") # create an on-disk database in the
                                     # directory /path/to/file.axiom

    @ivar typeToTableNameCache: a dictionary mapping Item subclass type objects
    to the fully-qualified sqlite table name where items of that type are
    stored.  This cache is generated from the saved schema metadata when this
    store is opened and updated when schema changes from other store objects
    (such as in other processes) are detected.

    @cvar __legacy__: an L{Item} may refer to a L{Store} via a L{reference},
    and this attribute tells the item reference system that the store itself is
    not an old version of an item; i.e. it does not need to have its upgraders
    invoked.

    @cvar storeID: an L{Item} may refer to a L{Store} via a L{reference}, and
    this attribute tells the item reference system that the L{Store} has a
    special ID that to use (which is never allocated to any item).
    """

    aggregateInterfaces = {
        IService: storeServiceSpecialCase,
        IServiceCollection: storeServiceSpecialCase,
        iaxiom.IBatchService: _storeBatchServiceSpecialCase,
        iaxiom.IScheduler: _schedulerServiceSpecialCase}

    implements(iaxiom.IBeneficiary)

    transaction = None          # set of objects changed in the current transaction
    touched = None              # set of objects changed since the last checkpoint

    databaseName = 'main'       # can differ if database is attached to another
                                # database.

    dbdir = None # FilePath to the Axiom database directory, or None for
                 # in-memory Stores.
    filesdir = None # FilePath to the filesystem-storage subdirectory of the
                    # database directory, or None for in-memory Stores.

    store = property(lambda self: self) # I have a 'store' attribute because I
                                        # am 'stored' within myself; this is
                                        # also for references to use.


    # Counter indicating things are going on which disallows changes to the
    # database.  Callbacks dispatched to application code while this is
    # non-zero will reject database changes with a ChangeRejected exception.
    _rejectChanges = 0

    # The following method and attributes are the ad-hoc interface required as
    # targets of attributes.reference attributes.  (In other words, the store
    # is a little bit like a fake item.)  These should probably eventually be
    # on an interface somewhere, and be better named.

    def _currentlyValidAsReferentFor(self, store):
        """
        Check to see if this store is currently valid as a target of a
        reference from an item in the given L{Store}.  This is true iff the
        given L{Store} is this L{Store}.

        @param store: the store that the referring item is present in.

        @type store: L{Store}
        """
        if store is self:
            return True
        else:
            return False

    __legacy__ = False

    storeID = STORE_SELF_ID


    def __init__(self, dbdir=None, filesdir=None, debug=False, parent=None, idInParent=None):
        """
        Create a store.

        @param dbdir: A L{FilePath} to (or name of) an existing Axiom directory, or
        directory that does not exist yet which will be created as this Store
        is instantiated.  If unspecified, this database will be kept in memory.

        @param filesdir: A L{FilePath} to (or name of) a directory to keep files in for in-memory
        stores. An exception will be raised if both this attribute and C{dbdir}
        are specified.

        @param debug: set to True if this Store should print out every SQL
        statement it sends to SQLite.

        @param parent: (internal) If this is opened using an
        L{axiom.substore.Substore}, a reference to its parent.

        @param idInParent: (internal) If this is opened using an
        L{axiom.substore.Substore}, the storeID of the item within its parent
        which opened it.

        @raises: C{ValueError} if both C{dbdir} and C{filesdir} are specified.
        """
        if parent is not None or idInParent is not None:
            assert parent is not None
            assert idInParent is not None
        self.parent = parent
        self.idInParent = idInParent
        self.debug = debug
        self.autocommit = True
        self.queryTimes = []
        self.execTimes = []

        self._inMemoryPowerups = {}

        self._attachedChildren = {} # database name => child store object

        self.statementCache = {} # non-normalized => normalized qmark SQL
                                 # statements

        self.activeTables = {}  # tables which have had items added/removed
                                # this run

        self.objectCache = _fincache.FinalizingCache()

        self.tableQueries = {}  # map typename: query string w/ storeID
                                # parameter.  a typename is a persistent
                                # database handle for what we'll call a 'FQPN',
                                # i.e. arg to namedAny.

        self.typenameAndVersionToID = {} # map database-persistent typename and
                                         # version to an oid in the types table

        self.typeToInsertSQLCache = {}
        self.typeToSelectSQLCache = {}
        self.typeToDeleteSQLCache = {}

        self.typeToTableNameCache = {}
        self.attrToColumnNameCache = {}

        self._upgradeManager = upgrade._StoreUpgrade(self)

        self._axiom_service = None


        if self.parent is None:
            self._upgradeService = SchedulingService()
        else:
            # Substores should hook into their parent, since they shouldn't
            # expect to have their own substore service started.
            self._upgradeService = self.parent._upgradeService


        # OK!  Everything that can be set up without touching the filesystem
        # has been done.  Let's get ready to open the actual database...

        _initialOpenFailure = None
        if dbdir is None:
            self._initdb(IN_MEMORY_DATABASE)
            self._initSchema()
            self._memorySubstores = []
            if filesdir is not None:
                if not isinstance(filesdir, filepath.FilePath):
                    filesdir = filepath.FilePath(filesdir)
                self.filesdir = filesdir
                if not self.filesdir.isdir():
                    self.filesdir.makedirs()
                    self.filesdir.child("temp").createDirectory()
        else:
            if filesdir is not None:
                raise ValueError("Only one of dbdir and filesdir"
                                 " may be specified")
            if not isinstance(dbdir, filepath.FilePath):
                dbdir = filepath.FilePath(dbdir)
                # required subdirs: files, temp, run
                # datafile: db.sqlite
            self.dbdir = dbdir
            self.filesdir = self.dbdir.child('files')

            if not dbdir.isdir():
                tempdbdir = dbdir.temporarySibling()
                tempdbdir.makedirs() # maaaaaaaybe this is a bad idea, we
                                     # probably shouldn't be doing this
                                     # automatically.
                for child in ('files', 'temp', 'run'):
                    tempdbdir.child(child).createDirectory()
                self._initdb(tempdbdir.child('db.sqlite').path)
                self._initSchema()
                self.close(_report=False)
                try:
                    tempdbdir.moveTo(dbdir)
                except:
                    _initialOpenFailure = Failure()

            try:
                self._initdb(dbdir.child('db.sqlite').path)
            except:
                if _initialOpenFailure is not None:
                    log.msg("Failed to initialize axiom database."
                            "  Possible cause of error: ")
                    log.err(_initialOpenFailure)
                raise

        self.transact(self._startup)

        # _startup may have found some things which we must now upgrade.
        if self._upgradeManager.upgradesPending:
            # Automatically upgrade when possible.
            self._upgradeComplete = PendingEvent()
            d = self._upgradeService.addIterator(self._upgradeManager.upgradeEverything())
            def logUpgradeFailure(aFailure):
                if aFailure.check(errors.ItemUpgradeError):
                    log.err(aFailure.value.originalFailure, 'Item upgrade error')
                log.err(aFailure, "upgrading %r failed" % (self,))
                return aFailure
            d.addErrback(logUpgradeFailure)
            def finishHim(resultOrFailure):
                self._upgradeComplete.callback(resultOrFailure)
                self._upgradeComplete = None
            d.addBoth(finishHim)
        else:
            self._upgradeComplete = None

        log.msg(
            interface=iaxiom.IStatEvent,
            store_opened=self.dbdir is not None and self.dbdir.path or '')

    _childCounter = 0

    def _attachChild(self, child):
        "attach a child database, returning an identifier for it"
        self._childCounter += 1
        databaseName = 'child_db_%d' % (self._childCounter,)
        self._attachedChildren[databaseName] = child
        # ATTACH DATABASE statements can't use bind paramaters, blech.
        self.executeSQL("ATTACH DATABASE '%s' AS %s" % (
                child.dbdir.child('db.sqlite').path,
                databaseName,))
        return databaseName

    attachedToParent = False

    def attachToParent(self):
        assert self.parent is not None, 'must have a parent to attach'
        assert self.transaction is None, "can't attach within a transaction"

        self.close()

        self.attachedToParent = True
        self.databaseName = self.parent._attachChild(self)
        self.connection = self.parent.connection
        self.cursor = self.parent.cursor

#     def detachFromParent(self):
#         pass


    def _initSchema(self):
        # No point in even attempting to transactionalize this:
        # every single statement is a CREATE TABLE or a CREATE
        # INDEX and those commit transactions silently anyway.
        for stmt in _schema.BASE_SCHEMA:
            self.executeSchemaSQL(stmt)


    def _startup(self):
        """
        Called during __init__.  Check consistency of schema in database with
        classes in memory.  Load all Python modules for stored items, and load
        version information for upgrader service to run later.
        """
        typesToCheck = []

        for oid, module, typename, version in self.querySchemaSQL(_schema.ALL_TYPES):
            if self.debug:
                print
                print 'SCHEMA:', oid, module, typename, version
            if typename not in _typeNameToMostRecentClass:
                try:
                    namedAny(module)
                except ValueError, err:
                    raise ImportError('cannot find module ' + module, str(err))
            self.typenameAndVersionToID[typename, version] = oid

        # Can't call this until typenameAndVersionToID is populated, since this
        # depends on building a reverse map of that.
        persistedSchema = self._loadTypeSchema()

        # Now that we have persistedSchema, loop over everything again and
        # prepare old types.
        for (typename, version), typeID in self.typenameAndVersionToID.iteritems():
            cls = _typeNameToMostRecentClass.get(typename)

            if cls is not None:
                if version != cls.schemaVersion:
                    typesToCheck.append(
                        self._prepareOldVersionOf(
                            typename, version, persistedSchema))
                else:
                    typesToCheck.append(cls)

        for cls in typesToCheck:
            self._checkTypeSchemaConsistency(cls, persistedSchema)

        # Schema is consistent!  Now, if I forgot to create any indexes last
        # time I saw this table, do it now...
        extantIndexes = self._loadExistingIndexes()
        for cls in typesToCheck:
            self._createIndexesFor(cls, extantIndexes)

        self._upgradeManager.checkUpgradePaths()


    def _loadExistingIndexes(self):
        """
        Return a C{set} of the SQL indexes which already exist in the
        underlying database.  It is important to load all of this information
        at once (as opposed to using many CREATE INDEX IF NOT EXISTS statements
        or many CREATE INDEX statements and handling the errors) to minimize
        the cost of opening a store.  Loading all the indexes at once is much
        faster than doing pretty much anything that involves doing something
        once per required index.
        """
        # Totally SQLite-specific: look up what indexes exist already in
        # sqlite_master so we can skip trying to create them (which can be
        # really slow).
        return set(
            name
            for (name,) in self.querySchemaSQL(
                "SELECT name FROM *DATABASE*.sqlite_master "
                "WHERE type = 'index'"))


    def _initdb(self, dbfname):
        self.connection = Connection.fromDatabaseName(dbfname)
        self.cursor = self.connection.cursor()


    def __repr__(self):
        d = self.dbdir
        if d is None:
            d = '(in memory)'
        else:
            d = repr(d)
        return '<Store %s@0x%x>' % (d, unsignedID(self))

    def findOrCreate(self, userItemClass, __ifnew=None, **attrs):
        """
        Usage::

            s.findOrCreate(userItemClass [, function] [, x=1, y=2, ...])

        Example::

            class YourItemType(Item):
                a = integer()
                b = text()
                c = integer()

            def f(x):
                print x, \"-- it's new!\"
            s.findOrCreate(YourItemType, f, a=1, b=u'2')

        Search for an item with columns in the database that match the passed
        set of keyword arguments, returning the first match if one is found,
        creating one with the given attributes if not.  Takes an optional
        positional argument function to call on the new item if it is new.
        """
        andargs = []
        for k, v in attrs.iteritems():
            col = getattr(userItemClass, k)
            andargs.append(col == v)

        if len(andargs) == 0:
            cond = []
        elif len(andargs) == 1:
            cond = [andargs[0]]
        else:
            cond = [attributes.AND(*andargs)]

        for result in self.query(userItemClass, *cond):
            return result
        newItem = userItemClass(store=self, **attrs)
        if __ifnew is not None:
            __ifnew(newItem)
        return newItem

    def newFilePath(self, *path):
        p = self.filesdir
        for subdir in path:
            p = p.child(subdir)
        return p

    def newTemporaryFilePath(self, *path):
        p = self.dbdir.child('temp')
        for subdir in path:
            p = p.child(subdir)
        return p

    def newFile(self, *path):
        """
        Open a new file somewhere in this Store's file area.

        @param path: a sequence of path segments.

        @return: an L{AtomicFile}.
        """
        assert len(path) > 0, "newFile requires a nonzero number of segments"
        if self.dbdir is None:
            if self.filesdir is None:
                raise RuntimeError("This in-memory store has no file directory")
            else:
                tmpbase = self.filesdir
        else:
            tmpbase = self.dbdir
        tmpname = tmpbase.child('temp').child(str(tempCounter.next()) + ".tmp")
        return AtomicFile(tmpname.path, self.newFilePath(*path))

    def newDirectory(self, *path):
        p = self.filesdir
        for subdir in path:
            p = p.child(subdir)
        return p


    def _loadTypeSchema(self):
        """
        Load all of the stored schema information for all types known by this
        store.  It's important to load everything all at once (rather than
        loading the schema for each type separately as it is needed) to keep
        store opening fast.  A single query with many results is much faster
        than many queries with a few results each.

        @return: A dict with two-tuples of item type name and schema version as
            keys and lists of five-tuples of attribute schema information for
            that type.  The elements of the five-tuple are::

              - a string giving the name of the Python attribute
              - a string giving the SQL type
              - a boolean indicating whether the attribute is indexed
              - the Python attribute type object (eg, axiom.attributes.integer)
              - a string giving documentation for the attribute
        """

        # Oops, need an index going the other way.  This only happens once per
        # store open, and it's based on data queried from the store, so there
        # doesn't seem to be any broader way to cache and re-use the result.
        # However, if we keyed the resulting dict on the database typeID rather
        # than (typeName, schemaVersion), we wouldn't need the information this
        # dict gives us.  That would mean changing the callers of this function
        # to use typeID instead of that tuple, which may be possible.  Probably
        # only represents a very tiny possible speedup.
        typeIDToNameAndVersion = {}
        for key, value in self.typenameAndVersionToID.iteritems():
            typeIDToNameAndVersion[value] = key

        # Indexing attribute, ordering by it, and getting rid of row_offset
        # from the schema and the sorted() here doesn't seem to be any faster
        # than doing this.
        persistedSchema = sorted(self.querySchemaSQL(
            "SELECT attribute, type_id, sqltype, indexed, "
            "pythontype, docstring FROM *DATABASE*.axiom_attributes "))

        # This is trivially (but measurably!) faster than getattr(attributes,
        # pythontype).
        getAttribute = attributes.__dict__.__getitem__

        result = {}
        for (attribute, typeID, sqltype, indexed, pythontype,
             docstring) in persistedSchema:
            key = typeIDToNameAndVersion[typeID]
            if key not in result:
                result[key] = []
            result[key].append((
                    attribute, sqltype, indexed,
                    getAttribute(pythontype), docstring))
        return result


    def _checkTypeSchemaConsistency(self, actualType, onDiskSchema):
        """
        Called for all known types at database startup: make sure that what we
        know (in memory) about this type agrees with what is stored about this
        type in the database.

        @param actualType: A L{MetaItem} instance which is associated with a
            table in this store.  The schema it defines in memory will be
            checked against the schema known in the database to ensure they
            agree.

        @param onDiskSchema: A mapping from L{MetaItem} instances (such as
            C{actualType}) to the schema known in the database and associated
            with C{actualType}.

        @raise RuntimeError: if the schema defined by C{actualType} does not
            match the database-present schema given in C{onDiskSchema} or if
            C{onDiskSchema} contains a newer version of the schema associated
            with C{actualType} than C{actualType} represents.
        """
        # make sure that both the runtime and the database both know about this
        # type; if they don't both know, we can't check that their views are
        # consistent
        try:
            inMemorySchema = _inMemorySchemaCache[actualType]
        except KeyError:
            inMemorySchema = _inMemorySchemaCache[actualType] = [
                (storedAttribute.attrname, storedAttribute.sqltype)
                for (name, storedAttribute) in actualType.getSchema()]

        key = (actualType.typeName, actualType.schemaVersion)
        persistedSchema = [(storedAttribute[0], storedAttribute[1])
                           for storedAttribute in onDiskSchema[key]]
        if inMemorySchema != persistedSchema:
            raise RuntimeError(
                "Schema mismatch on already-loaded %r <%r> object version %d: %r != %r" %
                (actualType, actualType.typeName, actualType.schemaVersion,
                 onDiskSchema, inMemorySchema))

        if actualType.__legacy__:
            return

        if (key[0], key[1] + 1) in onDiskSchema:
            raise RuntimeError(
                "Greater versions of database %r objects in the DB than in memory" %
                (actualType.typeName,))


    # finally find old versions of the data and prepare to upgrade it.
    def _prepareOldVersionOf(self, typename, version, persistedSchema):
        """
        Note that this database contains old versions of a particular type.
        Create the appropriate dummy item subclass and queue the type to be
        upgraded.

        @param typename: The I{typeName} associated with the schema for which
            to create a dummy item class.

        @param version: The I{schemaVersion} of the old version of the schema
            for which to create a dummy item class.

        @param persistedSchema: A mapping giving information about all schemas
            stored in the database, used to create the attributes of the dummy
            item class.
        """
        appropriateSchema = persistedSchema[typename, version]
        # create actual attribute objects
        dummyAttributes = {}
        for (attribute, sqlType, indexed, pythontype,
             docstring) in appropriateSchema:
            atr = pythontype(indexed=indexed, doc=docstring)
            dummyAttributes[attribute] = atr
        dummyBases = []
        oldType = declareLegacyItem(
            typename, version, dummyAttributes, dummyBases)
        self._upgradeManager.queueTypeUpgrade(oldType)
        return oldType


    def whenFullyUpgraded(self):
        """
        Return a Deferred which fires when this Store has been fully upgraded.
        """
        if self._upgradeComplete is not None:
            return self._upgradeComplete.deferred()
        else:
            return defer.succeed(None)

    def getOldVersionOf(self, typename, version):
        return _legacyTypes[typename, version]



        # grab the schema for that version
        # look up upgraders which push it forward

    def findUnique(self, tableClass, comparison=None, default=_noItem):
        """
        Find an Item in the database which should be unique.  If it is found,
        return it.  If it is not found, return 'default' if it was passed,
        otherwise raise L{errors.ItemNotFound}.  If more than one item is
        found, raise L{errors.DuplicateUniqueItem}.

        @param comparison: implementor of L{iaxiom.IComparison}.

        @param default: value to use if the item is not found.
        """
        results = list(self.query(tableClass, comparison, limit=2))
        lr = len(results)

        if lr == 0:
            if default is _noItem:
                raise errors.ItemNotFound(comparison)
            else:
                return default
        elif lr == 2:
            raise errors.DuplicateUniqueItem(comparison, results)
        elif lr == 1:
            return results[0]
        else:
            raise AssertionError("limit=2 database query returned 3+ results: ",
                                 comparison, results)


    def findFirst(self, tableClass, comparison=None,
                  offset=None, sort=None, default=None):
        """
        Usage::

            s.findFirst(tableClass [, query arguments except 'limit'])

        Example::

            class YourItemType(Item):
                a = integer()
                b = text()
                c = integer()
            ...
            it = s.findFirst(YourItemType,
                             AND(YourItemType.a == 1,
                                 YourItemType.b == u'2'),
                                 sort=YourItemType.c.descending)

        Search for an item with columns in the database that match the passed
        comparison, offset and sort, returning the first match if one is found,
        or the passed default (None if none is passed) if one is not found.
        """

        limit = 1
        for item in self.query(tableClass, comparison, limit, offset, sort):
            return item
        return default

    def query(self, tableClass, comparison=None,
              limit=None, offset=None, sort=None):
        """
        Return a generator of instances of C{tableClass},
        or tuples of instances if C{tableClass} is a
        tuple of classes.

        Examples::

            fastCars = s.query(Vehicle,
                axiom.attributes.AND(
                    Vehicle.wheels == 4,
                    Vehicle.maxKPH > 200),
                limit=100,
                sort=Vehicle.maxKPH.descending)

            quotesByClient = s.query( (Client, Quote),
                axiom.attributes.AND(
                    Client.active == True,
                    Quote.client == Client.storeID,
                    Quote.created >= someDate),
                limit=10,
                sort=(Client.name.ascending,
                      Quote.created.descending))

        @param tableClass: a subclass of Item to look for instances of,
        or a tuple of subclasses.

        @param comparison: a provider of L{IComparison}, or None, to match
        all items available in the store. If tableClass is a tuple, then
        the comparison must refer to all Item subclasses in that tuple,
        and specify the relationships between them.

        @param limit: an int to limit the total length of the results, or None
        for all available results.

        @param offset: an int to specify a starting point within the available
        results, or None to start at 0.

        @param sort: an L{ISort}, something that comes from an SQLAttribute's
        'ascending' or 'descending' attribute.

        @return: an L{ItemQuery} object, which is an iterable of Items or
        tuples of Items, according to tableClass.
        """
        if isinstance(tableClass, tuple):
            queryClass = MultipleItemQuery
        else:
            queryClass = ItemQuery

        return queryClass(self, tableClass, comparison, limit, offset, sort)

    def sum(self, summableAttribute, *a, **k):
        args = (self, summableAttribute.type) + a
        return AttributeQuery(attribute=summableAttribute,
                              *args, **k).sum()
    def count(self, *a, **k):
        return self.query(*a, **k).count()

    def batchInsert(self, itemType, itemAttributes, dataRows):
        """
        Create multiple items in the store without loading
        corresponding Python objects into memory.

        the items' C{stored} callback will not be called.

        Example::

            myData = [(37, u"Fred",  u"Wichita"),
                      (28, u"Jim",   u"Fresno"),
                      (43, u"Betty", u"Dubuque")]
            myStore.batchInsert(FooItem,
                                [FooItem.age, FooItem.name, FooItem.city],
                                myData)

        @param itemType: an Item subclass to create instances of.

        @param itemAttributes: an iterable of attributes on the Item subclass.

        @param dataRows: an iterable of iterables, each the same
        length as C{itemAttributes} and containing data corresponding
        to each attribute in it.

        @return: None.
        """
        class FakeItem:
            pass
        _NEEDS_DEFAULT = object() # token for lookup failure
        fakeOSelf = FakeItem()
        fakeOSelf.store = self
        sql = itemType._baseInsertSQL(self)
        indices = {}
        schema = [attr for (name, attr) in itemType.getSchema()]
        for i, attr in enumerate(itemAttributes):
            indices[attr] = i
        for row in dataRows:
            oid = self.store.executeSchemaSQL(
                _schema.CREATE_OBJECT, [self.store.getTypeID(itemType)])
            insertArgs = [oid]
            for attr in schema:
                i = indices.get(attr, _NEEDS_DEFAULT)
                if i is _NEEDS_DEFAULT:
                    pyval = attr.default
                else:
                    pyval = row[i]
                dbval = attr._convertPyval(fakeOSelf, pyval)
                insertArgs.append(dbval)
            self.executeSQL(sql, insertArgs)

    def _loadedItem(self, itemClass, storeID, attrs):
        if self.objectCache.has(storeID):
            result = self.objectCache.get(storeID)
            # XXX do checks on consistency between attrs and DB object, maybe?
        else:
            result = itemClass.existingInStore(self, storeID, attrs)
            if not result.__legacy__:
                self.objectCache.cache(storeID, result)
        return result


    def changed(self, item):
        """
        An item in this store was changed.  Add it to the current transaction's
        list of changed items, if a transaction is currently underway, or raise
        an exception if this L{Store} is currently in a state which does not
        allow changes.
        """
        if self._rejectChanges:
            raise errors.ChangeRejected()
        if self.transaction is not None:
            self.transaction.add(item)
            self.touched.add(item)


    def checkpoint(self):
        self._rejectChanges += 1
        try:
            for item in self.touched:
                # XXX: it should be possible here, using various clever hacks, to
                # automatically optimize functionally identical statements into
                # executemany.
                item.checkpoint()
            self.touched.clear()
        finally:
            self._rejectChanges -= 1

    executedThisTransaction = None
    tablesCreatedThisTransaction = None

    def transact(self, f, *a, **k):
        """
        Execute C{f(*a, **k)} in the context of a database transaction.

        Any changes made to this L{Store} by C{f} will be committed when C{f}
        returns.  If C{f} raises an exception, those changes will be reverted
        instead.

        If a transaction is already in progress (in this thread - ie, if a
        frame executing L{Store.transact} is already on the call stack), this
        will B{not} start a nested transaction.  Changes will not be committed
        until the existing transaction completes, and an exception raised by
        C{f} will not revert changes made by C{f}.  You probably don't want to
        ever call this if another transaction is in progress.

        @return: Whatever C{f(*a, **kw)} returns.
        @raise: Whatever C{f(*a, **kw)} raises, or a database exception.
        """
        if self.transaction is not None:
            return f(*a, **k)
        if self.attachedToParent:
            return self.parent.transact(f, *a, **k)
        try:
            self._begin()
            try:
                result = f(*a, **k)
                self.checkpoint()
            except:
                exc = Failure()
                try:
                    self.revert()
                except:
                    log.err(exc)
                    raise
                raise
            else:
                self._commit()
            return result
        finally:
            self._cleanupTxnState()

    # The following three methods are necessary...
    # - in PySQLite: because PySQLite has some buggy transaction handling which
    #   makes it impossible to issue explicit BEGIN statements - which we
    #   _need_ to do to provide guarantees for read/write transactions.

    def _begin(self):
        if self.debug:
            print '<'*10, 'BEGIN', '>'*10
        self.cursor.execute("BEGIN IMMEDIATE TRANSACTION")
        self._setupTxnState()

    def _setupTxnState(self):
        self.executedThisTransaction = []
        self.tablesCreatedThisTransaction = []
        if self.attachedToParent:
            self.transaction = self.parent.transaction
            self.touched = self.parent.touched
        else:
            self.transaction = set()
            self.touched = set()
        self.autocommit = False
        for sub in self._attachedChildren.values():
            sub._setupTxnState()

    def _commit(self):
        if self.debug:
            print '*'*10, 'COMMIT', '*'*10
        # self.connection.commit()
        self.cursor.execute("COMMIT")
        log.msg(interface=iaxiom.IStatEvent, stat_commits=1)
        self._postCommitHook()


    def _postCommitHook(self):
        self._rejectChanges += 1
        try:
            for committed in self.transaction:
                committed.committed()
        finally:
            self._rejectChanges -= 1


    def _rollback(self):
        if self.debug:
            print '>'*10, 'ROLLBACK', '<'*10
        # self.connection.rollback()
        self.cursor.execute("ROLLBACK")
        log.msg(interface=iaxiom.IStatEvent, stat_rollbacks=1)


    def revert(self):
        self._rollback()
        self._inMemoryRollback()


    def _inMemoryRollback(self):
        self._rejectChanges += 1
        try:
            for item in self.transaction:
                item.revert()
        finally:
            self._rejectChanges -= 1
        self.transaction.clear()
        for tableClass in self.tablesCreatedThisTransaction:
            del self.typenameAndVersionToID[tableClass.typeName,
                                            tableClass.schemaVersion]
            # Clear all cache related to this table
            for cache in (self.typeToInsertSQLCache,
                          self.typeToDeleteSQLCache,
                          self.typeToSelectSQLCache,
                          self.typeToTableNameCache) :
                if tableClass in cache:
                    del cache[tableClass]
            if tableClass.storeID in self.attrToColumnNameCache:
                del self.attrToColumnNameCache[tableClass.storeID]
            for name, attr in tableClass.getSchema():
                if attr in self.attrToColumnNameCache:
                    del self.attrToColumnNameCache[attr]

        for sub in self._attachedChildren.values():
            sub._inMemoryRollback()


    def _cleanupTxnState(self):
        self.autocommit = True
        self.transaction = None
        self.touched = None
        self.executedThisTransaction = None
        self.tablesCreatedThisTransaction = []
        for sub in self._attachedChildren.values():
            sub._cleanupTxnState()

    def close(self, _report=True):
        self.cursor.close()
        self.cursor = self.connection = None
        if self.debug and _report:
            if not self.queryTimes:
                print 'no queries'
            else:
                print 'query:', self.avgms(self.queryTimes)
            if not self.execTimes:
                print 'no execs'
            else:
                print 'exec:', self.avgms(self.execTimes)

    def avgms(self, l):
        return 'count: %d avg: %dus' % (len(l),
                                        int( (sum(l)/len(l)) * 1000000.),)

    def _indexNameOf(self, tableClass, attrname):
        """
        Return the unqualified (ie, no database name) name of the given
        attribute of the given table.

        @type tableClass: L{MetaItem}
        @param tableClass: The Python class associated with a table in the
            database.

        @param attrname: A sequence of the names of the columns of the
            indicated table which will be included in the named index.

        @return: A C{str} giving the name of the index which will index the
            given attributes of the given table.
        """
        return "axiomidx_%s_v%d_%s" % (tableClass.typeName,
                                       tableClass.schemaVersion,
                                       '_'.join(attrname))


    def _tableNameFor(self, typename, version):
        return "%s.item_%s_v%d" % (self.databaseName, typename, version)


    def getTableName(self, tableClass):
        """
        Retrieve the fully qualified name of the table holding items
        of a particular class in this store.  If the table does not
        exist in the database, it will be created as a side-effect.

        @param tableClass: an Item subclass

        @raises axiom.errors.ItemClassesOnly: if an object other than a
            subclass of Item is passed.

        @return: a string
        """
        if not (isinstance(tableClass, type) and issubclass(tableClass, item.Item)):
            raise errors.ItemClassesOnly("Only subclasses of Item have table names.")

        if tableClass not in self.typeToTableNameCache:
            self.typeToTableNameCache[tableClass] = self._tableNameFor(tableClass.typeName, tableClass.schemaVersion)
            # make sure the table exists
            self.getTypeID(tableClass)
        return self.typeToTableNameCache[tableClass]


    def getShortColumnName(self, attribute):
        """
        Retreive the column name for a particular attribute in this
        store.  The attribute must be bound to an Item subclass (its
        type must be valid). If the underlying table does not exist in
        the database, it will be created as a side-effect.

        @param tableClass: an Item subclass

        @return: a string

        XXX: The current implementation does not really match the
        description, which is actually more restrictive. But it will
        be true soon, so I guess it is ok for now.  The reason is
        that this method is used during table creation.
        """
        if isinstance(attribute, _StoreIDComparer):
            return 'oid'
        return '[' + attribute.attrname + ']'


    def getColumnName(self, attribute):
        """
        Retreive the fully qualified column name for a particular
        attribute in this store.  The attribute must be bound to an
        Item subclass (its type must be valid). If the underlying
        table does not exist in the database, it will be created as a
        side-effect.

        @param tableClass: an Item subclass

        @return: a string
        """
        if attribute not in self.attrToColumnNameCache:
            self.attrToColumnNameCache[attribute] = '.'.join(
                (self.getTableName(attribute.type),
                 self.getShortColumnName(attribute)))
        return self.attrToColumnNameCache[attribute]


    def getTypeID(self, tableClass):
        """
        Retrieve the typeID associated with a particular table in the
        in-database schema for this Store.  A typeID is an opaque integer
        representing the Item subclass, and the associated table in this
        Store's SQLite database.

        @param tableClass: a subclass of Item

        @return: an integer
        """
        key = (tableClass.typeName,
               tableClass.schemaVersion)
        if key in self.typenameAndVersionToID:
            return self.typenameAndVersionToID[key]
        return self.transact(self._maybeCreateTable, tableClass, key)


    def _maybeCreateTable(self, tableClass, key):
        """
        A type ID has been requested for an Item subclass whose table was not
        present when this Store was opened.  Attempt to create the table, and
        if that fails because another Store object (perhaps in another process)
        has created the table, re-read the schema.  When that's done, return
        the typeID.

        This method is internal to the implementation of getTypeID.  It must be
        run in a transaction.

        @param tableClass: an Item subclass
        @param key: a 2-tuple of the tableClass's typeName and schemaVersion

        @return: a typeID for the table; a new one if no table exists, or the
        existing one if the table was created by another Store object
        referencing this database.
        """
        sqlstr = []
        sqlarg = []

        # needs to be calculated including version
        tableName = self._tableNameFor(tableClass.typeName,
                                       tableClass.schemaVersion)

        sqlstr.append("CREATE TABLE %s (" % tableName)

        for nam, atr in tableClass.getSchema():
            # it's a stored attribute
            sqlarg.append("\n%s %s" %
                          (atr.getShortColumnName(self), atr.sqltype))

        if len(sqlarg) == 0:
            # XXX should be raised way earlier, in the class definition or something
            raise NoEmptyItems("%r did not define any attributes" % (tableClass,))

        sqlstr.append(', '.join(sqlarg))
        sqlstr.append(')')

        try:
            self.createSQL(''.join(sqlstr))
        except errors.TableAlreadyExists:
            # Although we don't have a memory of this table from the last time
            # we called "_startup()", another process has updated the schema
            # since then.
            self._startup()
            return self.typenameAndVersionToID[key]


        typeID = self.executeSchemaSQL(_schema.CREATE_TYPE,
                                       [tableClass.typeName,
                                        tableClass.__module__,
                                        tableClass.schemaVersion])

        self.typenameAndVersionToID[key] = typeID

        if self.tablesCreatedThisTransaction is not None:
            self.tablesCreatedThisTransaction.append(tableClass)

        # We can pass () for extantIndexes here because since the table didn't
        # exist for tableClass, none of its indexes could have either.
        # Whatever checks _createIndexesFor will make would give the same
        # result against the actual set of existing indexes as they will
        # against ().
        self._createIndexesFor(tableClass, ())

        for n, (name, storedAttribute) in enumerate(tableClass.getSchema()):
            self.executeSchemaSQL(
                _schema.ADD_SCHEMA_ATTRIBUTE,
                [typeID, n, storedAttribute.indexed, storedAttribute.sqltype,
                 storedAttribute.allowNone, storedAttribute.attrname,
                 storedAttribute.doc, storedAttribute.__class__.__name__])
            # XXX probably need something better for pythontype eventually,
            # when we figure out a good way to do user-defined attributes or we
            # start parameterizing references.

        return typeID


    def _createIndexesFor(self, tableClass, extantIndexes):
        """
        Create any indexes which don't exist and are required by the schema
        defined by C{tableClass}.

        @param tableClass: A L{MetaItem} instance which may define a schema
            which includes indexes.

        @param extantIndexes: A container (anything which can be the right-hand
            argument to the C{in} operator) which contains the unqualified
            names of all indexes which already exist in the underlying database
            and do not need to be created.
        """
        try:
            indexes = _requiredTableIndexes[tableClass]
        except KeyError:
            indexes = set()
            for nam, atr in tableClass.getSchema():
                if atr.indexed:
                    indexes.add(((atr.getShortColumnName(self),), (atr.attrname,)))
                for compound in atr.compoundIndexes:
                    indexes.add((tuple(inatr.getShortColumnName(self) for inatr in compound),
                                 tuple(inatr.attrname for inatr in compound)))
            _requiredTableIndexes[tableClass] = indexes

        # _ZOMFG_ SQL is such a piece of _shit_: you can't fully qualify the
        # table name in CREATE INDEX statements because the _INDEX_ is fully
        # qualified!

        indexColumnPrefix = '.'.join(self.getTableName(tableClass).split(".")[1:])

        for (indexColumns, indexAttrs) in indexes:
            nameOfIndex = self._indexNameOf(tableClass, indexAttrs)
            if nameOfIndex in extantIndexes:
                continue
            csql = 'CREATE INDEX %s.%s ON %s(%s)' % (
                self.databaseName, nameOfIndex, indexColumnPrefix,
                ', '.join(indexColumns))
            self.createSQL(csql)


    def getTableQuery(self, typename, version):
        if (typename, version) not in self.tableQueries:
            query = 'SELECT * FROM %s WHERE oid = ?' % (
                self._tableNameFor(typename, version), )
            self.tableQueries[typename, version] = query
        return self.tableQueries[typename, version]


    def getItemByID(self, storeID, default=_noItem, autoUpgrade=True):
        """
        Retrieve an item by its storeID, and return it.

        Note: most of the failure modes of this method are catastrophic and
        should not be handled by application code.  The only one that
        application programmers should be concerned with is KeyError.  They are
        listed for educational purposes.

        @param storeID: an L{int} which refers to the store.

        @param default: if passed, return this value rather than raising in the
        case where no Item is found.

        @raise TypeError: if storeID is not an integer.

        @raise UnknownItemType: if the storeID refers to an item row in the
        database, but the corresponding type information is not available to
        Python.

        @raise RuntimeError: if the found item's class version is higher than
        the current application is aware of.  (In other words, if you have
        upgraded a database to a new schema and then attempt to open it with a
        previous version of the code.)

        @raise KeyError: if no item corresponded to the given storeID.

        @return: an Item, or the given default, if it was passed and no row
        corresponding to the given storeID can be located in the database.
        """

        if not isinstance(storeID, (int, long)):
            raise TypeError("storeID *must* be an int or long, not %r" % (
                    type(storeID).__name__,))
        if storeID == STORE_SELF_ID:
            return self
        if self.objectCache.has(storeID):
            return self.objectCache.get(storeID)
        log.msg(interface=iaxiom.IStatEvent, stat_cache_misses=1, key=storeID)
        results = self.querySchemaSQL(_schema.TYPEOF_QUERY, [storeID])
        assert (len(results) in [1, 0]),\
            "Database panic: more than one result for TYPEOF!"
        if results:
            typename, module, version = results[0]
            # for the moment we're going to assume no inheritance
            attrs = self.querySQL(self.getTableQuery(typename, version),
                                  [storeID])
            if len(attrs) != 1:
                if default is _noItem:
                    raise errors.ItemNotFound("No results for known-to-be-good object")
                return default
            attrs = attrs[0]
            useMostRecent = False
            moreRecentAvailable = False

            # The schema may have changed since the last time I saw the
            # database.  Let's look to see if this is suspiciously broken...

            if _typeIsTotallyUnknown(typename, version):
                # Another process may have created it - let's re-up the schema
                # and see what we get.
                self._startup()

                # OK, all the modules have been loaded now, everything
                # verified.
                if _typeIsTotallyUnknown(typename, version):

                    # If there is STILL no inkling of it anywhere, we are
                    # almost certainly boned.  Let's tell the user in a
                    # structured way, at least.
                    raise errors.UnknownItemType(
                        "cannot load unknown schema/version pair: %r %r - id: %r" %
                        (typename, version, storeID))

            if typename in _typeNameToMostRecentClass:
                moreRecentAvailable = True
                mostRecent = _typeNameToMostRecentClass[typename]

                if mostRecent.schemaVersion < version:
                    raise RuntimeError("%s:%d - was found in the database and most recent %s is %d" %
                                       (typename, version, typename, mostRecent.schemaVersion))
                if mostRecent.schemaVersion == version:
                    useMostRecent = True
            if useMostRecent:
                T = mostRecent
            else:
                T = self.getOldVersionOf(typename, version)
            x = T.existingInStore(self, storeID, attrs)
            if moreRecentAvailable and (not useMostRecent) and autoUpgrade:
                # upgradeVersion will do caching as necessary, we don't have to
                # cache here.  (It must, so that app code can safely call
                # upgradeVersion and get a consistent object out of it.)
                x = self.transact(self._upgradeManager.upgradeItem, x)
            elif not x.__legacy__:
                # We loaded the most recent version of an object
                self.objectCache.cache(storeID, x)
            return x
        if default is _noItem:
            raise KeyError(storeID)
        return default


    def querySchemaSQL(self, sql, args=()):
        sql = sql.replace("*DATABASE*", self.databaseName)
        return self.querySQL(sql, args)


    def querySQL(self, sql, args=()):
        """For use with SELECT (or SELECT-like PRAGMA) statements.
        """
        if self.debug:
            result = timeinto(self.queryTimes, self._queryandfetch, sql, args)
        else:
            result = self._queryandfetch(sql, args)
        return result


    def _queryandfetch(self, sql, args):
        if self.debug:
            print '**', sql, '--', ', '.join(map(str, args))
        self.cursor.execute(sql, args)
        before = time.time()
        result = list(self.cursor)
        after = time.time()
        if after - before > 2.0:
            log.msg('Extremely long list(cursor): %s' % (after - before,))
            log.msg(sql)
            # import traceback; traceback.print_stack()
        if self.debug:
            print '  lastrow:', self.cursor.lastRowID()
            print '  result:', result
        return result


    def createSQL(self, sql, args=()):
        """
        For use with auto-committing statements such as CREATE TABLE or CREATE
        INDEX.
        """
        before = time.time()
        self._execSQL(sql, args)
        after = time.time()
        if after - before > 2.0:
            log.msg('Extremely long CREATE: %s' % (after - before,))
            log.msg(sql)
            # import traceback; traceback.print_stack()


    def _execSQL(self, sql, args):
        if self.debug:
            rows = timeinto(self.execTimes, self._queryandfetch, sql, args)
        else:
            rows = self._queryandfetch(sql, args)
        assert not rows
        return sql


    def executeSchemaSQL(self, sql, args=()):
        sql = sql.replace("*DATABASE*", self.databaseName)
        return self.executeSQL(sql, args)


    def executeSQL(self, sql, args=()):
        """
        For use with UPDATE or INSERT statements.
        """
        sql = self._execSQL(sql, args)
        result = self.cursor.lastRowID()
        if self.executedThisTransaction is not None:
            self.executedThisTransaction.append((result, sql, args))
        return result

# This isn't actually useful any more.  It turns out that the pysqlite
# documentation is confusingly worded; it's perfectly possible to create tables
# within transactions, but PySQLite's automatic transaction management (which
# we turn off) breaks that.  However, a function very much like it will be
# useful for doing nested transactions without support from the database
# itself, so I'm keeping it here commented out as an example.

#     def _reexecute(self):
#         assert self.executedThisTransaction is not None
#         self._begin()
#         for resultLastTime, sql, args in self.executedThisTransaction:
#             self._execSQL(sql, args)
#             resultThisTime = self.cursor.lastRowID()
#             if resultLastTime != resultThisTime:
#                 raise errors.TableCreationConcurrencyError(
#                     "Expected to get %s as a result "
#                     "of %r:%r, got %s" % (
#                         resultLastTime,
#                         sql, args,
#                         resultThisTime))


def timeinto(l, f, *a, **k):
    then = time.time()
    try:
        return f(*a, **k)
    finally:
        now = time.time()
        elapsed = now - then
        l.append(elapsed)

queryTimes = []
execTimes = []
python-axiom 0.6.0-4 / usr / share / pyshared / axiom / store.py