This file is indexed.

/usr/share/pyshared/pypsignifit/pygibbsit.py is in python-pypsignifit 3.0~beta.20120611.1-1build1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#!/usr/bin/env python
# vi: set ft=python sts=4 ts=4 sw=4 et:

######################################################################
#
#   See COPYING file distributed along with the psignifit package for
#   the copyright and license terms
#
######################################################################

__docformat__ = "restructuredtext"

__doc__ = """This module implements the Raftery & Lewis gibbsit program in python."""

from numpy import *
import pylab as p
from scipy import stats

__all__ = ["gibbsit"]

def recode ( U, q ):
    """recode a series of the parameter of interest to a binary series

    :Parameters:
        U       chain of samples of the parameter of interest (1d-array!)
        q       quantile of interest

    :Output:
        Z       a series that is 1 for all U[t] less than the q-quantile
                of U and that is 0 otherwise
    """
    u = p.prctile ( U, 100*q )
    return (U<=u).astype("d")

def mctest ( Z ):
    """Compare 1st order and 2nd order Markov Chain models

    :Parameters:
        Z       a binary series of 0 and 1

    :Output:
        BIC,G2
        BIC     Bayesian information criterion for the comparison between
                1st order and 2nd order Markov Chain models. If BIC<=0,
                a 1st order model should be preferred.
        G2      the corresponding likelihood ratio statistic
    """
    # Determine Transition array
    T = zeros ( (2,2,2), 'd' )
    for i in xrange (2):
        for j in xrange (2):
            for k in xrange (2):
                T[i,j,k] = ( (Z[:-2]==i) & (Z[1:-1]==j) & (Z[2:]==k) ).sum()

    # Fitted model
    fitted = zeros ( (2,2,2), 'd' )
    for i in xrange (2):
        for j in xrange (2):
            for k in xrange (2):
                fitted[i,j,k] = T[i,j,:].sum() * T[:,j,k].sum() / T[:,j,:].sum()

    # Statistics
    G2 = 2 * sum ( T*log(T/fitted) )
    BIC = G2 - 2*log(Z.shape[0]-2)
    return BIC,G2

def indtest ( Z ):
    """Compare 1st order Markov Chain and independence chain

    :Parameters:
        Z       a binary series of 0 and 1

    :Output:
        BIC,G2
        BIC     Bayesian information criterion for the comparison between
                1st order Markov Chain and an independence chain, i.e. a
                chain in which all samples are independent of each others.
                If BIC<=0, the independence chain is to be preferred.
        G2      the corresponding likelihood ratio statistic
    """
    # Determine Transition matrix
    T = zeros ( (2,2), 'd' )
    for i in xrange (2):
        for j in xrange (2):
            T[i,j] = ( (Z[:-1]==i) & (Z[1:]==j) ).sum()

    # Fitted model
    fitted = zeros ( (2,2), 'd' )
    for i in xrange (2):
        for j in xrange (2):
            fitted[i,j] = T[i,:].sum() * T[j,:].sum() / (Z.shape[0]-1)

    # Statistics
    G2 = 2 * sum ( T*log(T/fitted) )
    BIC = G2 - 2*log(Z.shape[0]-1)
    return BIC,G2

def mcest ( Z ):
    """Estimate parameters of a 1st order Markov Chain

    :Parameters:
        Z       a binary series of 0 and 1

    :Output:
        alpha,beta
        alpha   Probability to move from state 0 to state 1
        beta    Probability to move from state 1 to state 0
    """
    # Determine Transition matrix
    T = zeros ( (2,2), 'd' )
    for i in xrange (2):
        for j in xrange (2):
            T[i,j] = ( (Z[:-1]==i) & (Z[1:]==j) ).sum()

    # Calculate transition probability
    alpha = T[0,1]/T[0,:].sum()
    beta  = T[1,0]/T[1,:].sum()
    return alpha,beta

def find_index ( Z, test=mctest, kstart=1 ):
    """Find an index based on a test function

    Find a thinning k from which a particular model is preferred.
    The function starts with k=kstart and performs model comparisons
    parameterized by the parameter test for increasing k. An IndexError is
    raised if k>200.

    :Parameters:
        Z       a binary series of 0 and 1
        test    a function, that returns the BIC for the test of interest
                a first argument. Useful functions for this task are
                mctest and indtest
        kstart  starting value for thinning to be used

    :Output:
        kstop   the first k value at which BIC<=0
    """
    k = kstart
    while True:
        BIC = test ( Z[::k] )[0]
        if BIC<=0:
            return k
        if k>200:
            raise IndexError
        k += 1

class MCMCpar (dict):
    def __init__ (self, d):
        dict.__init__(self,d)
    burnin = property ( fget=lambda self: self.setdefault("M",0) )
    thin   = property ( fget=lambda self: self.setdefault("kthin",1) )
    Nsamples = property ( fget=lambda self: self.setdefault("N",self.setdefault("Nmin",1000) ) )

def gibbsit ( D=None, q=0.025, r=0.0125, s=0.95, eps=0.001 ):
    """The real gibbsit routine

    This function resembles the functionality of the gibbsit program by
    Raftery & Lewis (1995). It estimates parameters for a Markov Chain that
    should result in reasonable estimates of quantiles. These parameters
    are probabilistic in nature. Thus, they are to be taken as a good
    guess, not as exact values.

    :Parameters:
        D       a chain from a testrun of the sampling routine. If D is
                None, only the length that would be required for this chain
                is returned
        q       quantile of interest
        r       desired accuracy of the quantile of interest
        s       probability with which the estimated quantile is within
                q+/-r in the end
        eps     accuracy of estimation of the stationary density around the
                quantile

    :Output:
        params  a dictionary with the parameters for an MCMC run
    """
    # A correction parameter that is used multiple times
    phiterm = (stats.norm.ppf ( 0.5*(s+1) ) / r)**2

    params = MCMCpar({})

    if not D is None:
        Z = recode ( D, q )

        # Determine thinning
        kthin = find_index ( Z, mctest, 1 )       # Thinning required for 1st order markov chain
        kind  = find_index ( Z, indtest, kthin )  # Thinning required for independence chain

        # Estimate parameters of the 1st order markov chain ...
        alpha,beta = mcest ( Z[::kthin] )

        # Derive Quantities that have an explicit form
        ms = log ( (alpha+beta)*eps/max(alpha,beta) ) / log (1-alpha-beta)
        ns = (2-alpha-beta)*alpha*beta/(alpha+beta)**3 * phiterm
        params = MCMCpar({ "kthin": kthin, "kind": kind, "ms": ms, "M": int(ceil(ms*kthin)), "ns": ns, "N": int(ceil(ns*kthin)) })
    Nmin = q*(1-q) * phiterm
    params["Nmin"] = int(ceil(Nmin))

    return params

def main ( ):
    # Check whether we obtain similar results as the original
    D = fromfile ( "/home/ingo/tmp/mcmc_short.dat", sep=" " )
    D.shape = (-1,3)
    par = gibbsit(D[:,0])
    print par.thin,par.burnin,par.Nsamples

if __name__ == "__main__":
    main()