/usr/bin/pda-landscape is in python-mlpy 2.2.0~dfsg1-3build3.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | #!/usr/bin/python
from numpy import *
from optparse import OptionParser
from mlpy import *
# Command line parsing
parser = OptionParser()
parser.add_option("-d", "--data", metavar = "FILE", action = "store", type = "string",
dest = "data", help = "data - required")
parser.add_option("-s", "--standardize", action = "store_true", default = False,
dest = "stand", help = "standardize data")
parser.add_option("-n", "--normalize", action = "store_true", default = False,
dest = "norm", help = "normalize data")
parser.add_option("-k", action = "store", type = "int",
dest = "k", help = "k for k-fold cross validation")
parser.add_option("-c", action = "store", type = "int", nargs = 2, metavar = "SETS PAIRS",
dest = "c", help = "sets and pairs for monte carlo cross validation")
parser.add_option("-S", "--stratified", action = "store_true", default = False,
dest = "strat", help = "for stratified cv")
parser.add_option("-m", "--min", action = "store", type = "float",
dest = "min", help = "min value for number of regressions [default %default]", default = 1)
parser.add_option("-M", "--max", action = "store", type = "float",
dest = "max", help = "max value for number of regressions [default %default]", default = 20)
parser.add_option("-p", "--steps", action = "store", type = "int",
dest = "steps", help = "steps for number of regressions [default %default]", default = 20)
parser.add_option("-e", "--scale", action = "store", type = "string",
dest = "scale", help = "scale for number of regressions: 'lin' or 'log' [default %default]", default = "lin")
parser.add_option("-l", "--lists", action = "store_true", default = False,
dest = "lists", help = "Canberra distance indicator")
(options, args) = parser.parse_args()
if not options.data:
parser.error("option -d (data) is required")
if not (options.k or options.c):
parser.error("option -k (k-fold) or -c (monte carlo) for resampling is required")
if (options.k and options.c):
parser.error("option -k (k-fold) and -c (monte carlo) are mutually exclusive")
if not options.scale in ["lin", "log"]:
parser.error("option -e (scale) should be 'lin' or 'log'")
# C values
if options.scale == 'lin':
Nreg = linspace(options.min, options.max, options.steps)
elif options.scale == 'log':
Nreg = logspace(options.min, options.max, options.steps)
# Data
x, y = data_fromfile(options.data)
if options.stand:
x = data_standardize(x)
if options.norm:
x = data_normalize(x)
print "samples:", x.shape[0]
print "features:", x.shape[1]
# Resampling
if options.strat:
if options.k:
print "stratified %d-fold cv" % options.k
res = kfoldS(cl = y, sets = options.k)
elif options.c:
print "stratified monte carlo cv (%d sets, %d pairs)" %(options.c[0], options.c[1])
res = montecarloS(cl = y, sets = options.c[0], pairs = options.c[1])
else:
if options.k:
print "%d-fold cv" % options.k
res = kfold(nsamples = y.shape[0], sets = options.k)
elif options.c:
print "monte carlo cv (%d sets, %d pairs)" %(options.c[0], options.c[1])
res = montecarlo(nsamples = y.shape[0], sets = options.c[0], pairs = options.c[1])
if options.lists:
R = Ranking(method='onestep')
lp = empty((len(res), x.shape[1]), dtype = int)
# Compute
for n in Nreg:
P = Pda(Nreg = int(n)) # Initialize pda class
ERR = 0.0 # Initialize error
MCC = 0.0 # Initialize mcc
for i, r in enumerate(res):
xtr, ytr, xts, yts = x[r[0]], y[r[0]], x[r[1]], y[r[1]]
P.compute(xtr, ytr)
p = P.predict(xts)
if options.lists:
lp[i] = R.compute(xtr, ytr, P).argsort()
ERR += err(yts, p)
MCC += mcc(yts, p)
ERR /= float(len(res))
MCC /= float(len(res))
if options.lists:
DIST = canberra(lp, x.shape[1])
else:
DIST = 0.0
print "Nreg %d: error %f, mcc %f, dist %f" \
% (n, ERR, MCC, DIST)
|