/usr/share/octave/packages/statistics-1.3.0/jackknife.m is in octave-statistics 1.3.0-4.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | ## Copyright (C) 2011 Alexander Klein <alexander.klein@math.uni-giessen.de>
##
## This program is free software; you can redistribute it and/or modify it under
## the terms of the GNU General Public License as published by the Free Software
## Foundation; either version 3 of the License, or (at your option) any later
## version.
##
## This program is distributed in the hope that it will be useful, but WITHOUT
## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
## details.
##
## You should have received a copy of the GNU General Public License along with
## this program; if not, see <http://www.gnu.org/licenses/>.
## -*- texinfo -*-
## @deftypefn{Function File} {@var{jackstat} =} jackknife (@var{E}, @var{x}, @dots{})
## Compute jackknife estimates of a parameter taking one or more given samples as parameters.
## In particular, @var{E} is the estimator to be jackknifed as a function name, handle,
## or inline function, and @var{x} is the sample for which the estimate is to be taken.
## The @var{i}-th entry of @var{jackstat} will contain the value of the estimator
## on the sample @var{x} with its @var{i}-th row omitted.
##
## @example
## @group
## jackstat(@var{i}) = @var{E}(@var{x}(1 : @var{i} - 1, @var{i} + 1 : length(@var{x})))
## @end group
## @end example
##
## Depending on the number of samples to be used, the estimator must have the appropriate form:
## If only one sample is used, then the estimator need not be concerned with cell arrays,
## for example jackknifing the standard deviation of a sample can be performed with
## @code{@var{jackstat} = jackknife (@@std, rand (100, 1))}.
## If, however, more than one sample is to be used, the samples must all be of equal size,
## and the estimator must address them as elements of a cell-array,
## in which they are aggregated in their order of appearance:
##
## @example
## @group
## @var{jackstat} = jackknife(@@(x) std(x@{1@})/var(x@{2@}), rand (100, 1), randn (100, 1)
## @end group
## @end example
##
## If all goes well, a theoretical value @var{P} for the parameter is already known,
## @var{n} is the sample size,
## @code{@var{t} = @var{n} * @var{E}(@var{x}) - (@var{n} - 1) * mean(@var{jackstat})}, and
## @code{@var{v} = sumsq(@var{n} * @var{E}(@var{x}) - (@var{n} - 1) * @var{jackstat} - @var{t}) / (@var{n} * (@var{n} - 1))}, then
## @code{(@var{t}-@var{P})/sqrt(@var{v})} should follow a t-distribution with @var{n}-1 degrees of freedom.
##
## Jackknifing is a well known method to reduce bias; further details can be found in:
## @itemize @bullet
## @item Rupert G. Miller: The jackknife-a review; Biometrika (1974) 61(1): 1-15; doi:10.1093/biomet/61.1.1
## @item Rupert G. Miller: Jackknifing Variances; Ann. Math. Statist. Volume 39, Number 2 (1968), 567-582; doi:10.1214/aoms/1177698418
## @item M. H. Quenouille: Notes on Bias in Estimation; Biometrika Vol. 43, No. 3/4 (Dec., 1956), pp. 353-360; doi:10.1093/biomet/43.3-4.353
## @end itemize
## @end deftypefn
## Author: Alexander Klein <alexander.klein@math.uni-giessen.de>
## Created: 2011-11-25
function jackstat = jackknife ( anEstimator, varargin )
## Convert function name to handle if necessary, or throw
## an error.
if ( !strcmp ( typeinfo ( anEstimator ), "function handle" ) )
if ( isascii ( anEstimator ) )
anEstimator = str2func ( anEstimator );
else
error ( "Estimators must be passed as function names or handles!" );
end
end
## Simple jackknifing can be done with a single vector argument, and
## first and foremost with a function that does not care about
## cell-arrays.
if ( length ( varargin ) == 1 && isnumeric ( varargin { 1 } ) )
aSample = varargin { 1 };
g = length ( aSample );
jackstat = zeros ( 1, g );
for k = 1 : g
jackstat ( k ) = anEstimator ( aSample ( [ 1 : k - 1, k + 1 : g ] ) );
end
## More complicated input requires more work, however.
else
g = cellfun ( @(x) length ( x ), varargin );
if ( any ( g - g ( 1 ) ) )
error ( "All passed data must be of equal length!" );
end
g = g ( 1 );
jackstat = zeros ( 1, g );
for k = 1 : g
jackstat ( k ) = anEstimator ( cellfun ( @(x) x( [ 1 : k - 1, k + 1 : g ] ), varargin, "UniformOutput", false ) );
end
end
endfunction
%!test
%! ##Example from Quenouille, Table 1
%! d=[0.18 4.00 1.04 0.85 2.14 1.01 3.01 2.33 1.57 2.19];
%! jackstat = jackknife ( @(x) 1/mean(x), d );
%! assert ( 10 / mean(d) - 9 * mean(jackstat), 0.5240, 1e-5 );
%!demo
%! for k = 1:1000
%! x=rand(10,1);
%! s(k)=std(x);
%! jackstat=jackknife(@std,x);
%! j(k)=10*std(x) - 9*mean(jackstat);
%! end
%! figure();hist([s',j'], 0:sqrt(1/12)/10:2*sqrt(1/12))
%!demo
%! for k = 1:1000
%! x=randn(1,50);
%! y=rand(1,50);
%! jackstat=jackknife(@(x) std(x{1})/std(x{2}),y,x);
%! j(k)=50*std(y)/std(x) - 49*mean(jackstat);
%! v(k)=sumsq((50*std(y)/std(x) - 49*jackstat) - j(k)) / (50 * 49);
%! end
%! t=(j-sqrt(1/12))./sqrt(v);
%! figure();plot(sort(tcdf(t,49)),"-;Almost linear mapping indicates good fit with t-distribution.;")
|