/usr/lib/python-escript-mpi/esys/downunder/splitminimizers.py is in python-escript-mpi 5.1-5.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 | ##############################################################################
#
# Copyright (c) 2014-2017 by The University of Queensland
# http://www.uq.edu.au
#
# Primary Business: Queensland, Australia
# Licensed under the Apache License, version 2.0
# http://www.apache.org/licenses/LICENSE-2.0
#
# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
# Development 2012-2013 by School of Earth Sciences
# Development from 2014 by Centre for Geoscience Computing (GeoComp)
#
##############################################################################
from __future__ import print_function, division
from .minimizers import AbstractMinimizer
from esys.escriptcore.splitworld import Job, FunctionJob
from .splitinversioncostfunctions import SplitInversionCostFunction
from esys.escript.pdetools import ArithmeticTuple
import numpy as np
class SplitMinimizerLBFGS(AbstractMinimizer):
"""
Minimizer that uses the limited-memory Broyden-Fletcher-Goldfarb-Shanno
method.
version modified to fit with split world.
"""
# History size
_truncation = 30
# Initial Hessian multiplier
_initial_H = 1
# Restart after this many iteration steps
_restart = 60
def getOptions(self):
return {'truncation':self._truncation,'initialHessian':self._initial_H, 'restart':self._restart}
def setOptions(self, **opts):
self.logger.debug("Setting options: %s"%(str(opts)))
for o in opts:
if o=='historySize' or o=='truncation':
self._truncation=opts[o]
elif o=='initialHessian':
self._initial_H=opts[o]
elif o=='restart':
self._restart=opts[o]
else:
raise KeyError("Invalid option '%s'"%o)
# This function sets current_point=base_point+alpha*search_direction [m=m+p*a]
@staticmethod
def move_point_from_base(self, **kwargs):
m=self.importValue('base_point')
p=self.importValue('search_direction')
a=kwargs['alpha']
newpoint=m+p*a
SplitInversionCostFunction.update_point_helper(self, newpoint)
def run(self):
"""
This version relies on the costfunction already having an initial guess loaded.
It also does not return the result, meaning a job needs to be submitted to
get the result out.
"""
# First we'll define our own versions of the helper functions
# Updates "g_Jx_new_0" and "g_Jx_new_1" to the gradient at the current point
# then returns f.dualProduct of search_direction and g_Jx_new
# Not a splitworld function
def grad_phi(f, **kwargs):
f.calculateGradient('g_Jx_0', 'g_Jx_1')
# need to call dualProduct here
def dual_p_g_Jx_new(self, **kwargs):
p=self.importValue("search_direction")
g_Jx_0=self.importValue("g_Jx_0")
g_Jx_1=self.importValue("g_Jx_1")
reg=self.importValue("regularization")
#again, this assumes that only the regularization term is relevant
res=reg.getDualProduct(p, (g_Jx_0, g_Jx_1))
self.exportValue("dp_result",res)
# Now we will only run this on one world and rely on getDouble to ship it
f.splitworld.addJob( FunctionJob, dual_p_g_Jx_new)
f.splitworld.runJobs()
res=f.splitworld.getFloatVariable("dp_result")
return res
#End of grad_phi
def _zoom(f, alpha_lo, alpha_hi, phi_lo, phi_hi, c1, c2,
phi0, gphi0, IMAX=25):
"""
Helper function for `line_search` below which tries to tighten the range
alpha_lo...alpha_hi. See Chapter 3 of 'Numerical Optimization' by
J. Nocedal for an explanation.
"""
i=0
while True:
alpha=alpha_lo+.5*(alpha_hi-alpha_lo) # should use interpolation...
f.splitworld.addJobPerWorld( FunctionJob, SplitMinimizerLBFGS.move_point_from_base, alpha=alpha, imports=['base_point', 'search_direction'])
f.splitworld.runJobs()
f.calculateValue('phi_a')
phi_a=f.splitworld.getFloatVariable('phi_a')
#zoomlogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
if phi_a > phi0+c1*alpha*gphi0 or phi_a >= phi_lo:
alpha_hi=alpha
else:
gphi_a=grad_phi(f)
#zoomlogger.debug("\tgrad(phi(alpha))=%e"%(gphi_a))
if np.abs(gphi_a) <= -c2*gphi0:
break
if gphi_a*(alpha_hi-alpha_lo) >= 0:
alpha_hi = alpha_lo
alpha_lo=alpha
phi_lo=phi_a
i+=1
if i>IMAX:
gphi_a=None
break
return alpha, phi_a, gphi_a
def line_search(f, alpha=1.0, alpha_truncationax=50.0,
c1=1e-4, c2=0.9, IMAX=15):
"""
Line search method that satisfies the strong Wolfe conditions.
See Chapter 3 of 'Numerical Optimization' by J. Nocedal for an explanation.
This version is converted from the line_search from minimizers.py
however, it takes fewer parameters because some of the values needed
by the original version will be available as subworld variables rather
than as parameters.
:param f: callable objective function f(x)
:param p: search direction
:param alpha: initial step length. If g_Jx is properly scaled alpha=1 is a
reasonable starting value.
:param alpha_truncationax: algorithm terminates if alpha reaches this value
:param c1: value for Armijo condition (see reference)
:param c2: value for curvature condition (see reference)
:param IMAX: maximum number of iterations to perform
Removed parameters (now in subworld variables instead):
x - The start value for line search: in the variable "current_point".
p - search direction: in the variable "search_direction"
g_Jx - value for the gradient of f at x: in the variables "g_Jx_0" and "g_Jx_1"
Jx - value of f(x): in the variable "Jx"
"""
# This will handle subworld side of work
def line_search_init_worker(self, **kwargs):
x=self.importValue("current_point")
p=self.importValue("search_direction")
g_Jx=ArithmeticTuple(self.importValue("g_Jx_0"), self.importValue("g_Jx_1"))
Jx=self.importValue("Jx")
regular=self.importValue("regularization")
phi0=Jx
# In the original, this part calls getDualProduct on f
# However, since that only ends up referring to the
# regularisation term, I've called that directly
# If your dual product operation requires access to
# the other models, then this step needs
# a rethink since not all models are local
gphi0=regular.getDualProduct(p, g_Jx)
#Still need to decide what this worker will do
old_phi_a=phi0
phi_a=phi0
self.exportValue("old_phi_a", old_phi_a)
self.exportValue("phi_a", phi_a)
self.exportValue("gphi0", gphi0)
self.exportValue("phi0", phi0)
self.exportValue("base_point", x) # To ensure we can revert if needed
#End of line_search_init_worker
old_alpha=0
i=1
f.splitworld.addJobPerWorld( FunctionJob, line_search_init_worker, imports=['search_direction', 'g_Jx_0', 'g_Jx_1', 'Jx', 'regularization'])
f.splitworld.runJobs()
# Updates "g_Jx_new_0" and "g_Jx_new_1" to the gradient at the current point
# then returns f.dualProduct of search_direction and g_Jx_new
# Not a splitworld function
def grad_phi(f, **kwargs):
f.calculateGradient('g_Jx_0', 'g_Jx_1')
# need to call dualProduct here
def dual_p_g_Jx_new(self, **kwargs):
p=self.importValue("search_direction")
g_Jx_0=self.importValue("g_Jx_0")
g_Jx_1=self.importValue("g_Jx_1")
reg=self.importValue("regularization")
#again, this assumes that only the regularization term is relevant
res=reg.getDualProduct(p, (g_Jx_0, g_Jx_1))
self.exportValue("dp_result",res)
# Now we will only run this on one world and rely on getDouble to ship it
f.splitworld.addJob( FunctionJob, dual_p_g_Jx_new)
f.splitworld.runJobs()
res=f.splitworld.getFloatVariable("dp_result")
return res
#End of grad_phi
while i<IMAX and alpha>0. and alpha<alpha_truncationax:
alpha_at_loop_start=alpha
f.splitworld.addJobPerWorld( FunctionJob, SplitMinimizerLBFGS.move_point_from_base, alpha=alpha, imports=['current_point', 'search_direction'])
f.splitworld.runJobs()
f.calculateValue('phi_a')
#lslogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
phi_a=f.splitworld.getFloatVariable('phi_a')
phi0=f.splitworld.getFloatVariable('phi0')
gphi0=f.splitworld.getFloatVariable('gphi0')
old_phi_a=f.splitworld.getFloatVariable('old_phi_a')
if (phi_a > phi0+c1*alpha*gphi0) or ((phi_a>=old_phi_a) and (i>1)):
alpha, phi_a, gphi_a = _zoom(f, old_alpha, alpha, old_phi_a, phi_a, c1, c2, phi0, gphi0)
break
# Need to check if alpha has changed. If it has, we need to move the point again
if alpha_at_loop_start!=alpha:
f.splitworld.addJobPerWorld( FunctionJob, SplitMinimizerLBFGS.move_point_from_base, alpha=alpha, imports=['current_point', 'search_direction'])
f.splitworld.runJobs()
gphi_a=grad_phi(f)
if np.abs(gphi_a) <= -c2*gphi0:
break
if gphi_a >= 0:
alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, alpha, old_alpha, phi_a, old_phi_a, c1, c2, phi0, gphi0)
break
old_alpha=alpha
# the factor is arbitrary as long as there is sufficient increase
alpha=2.*alpha
old_phi_a=phi_a
i+=1
#return alpha, phi_a, g_Jx_new[0]
return alpha, phi_a
#End of line_search
splitworld=self.getCostFunction().splitworld
if self.getCostFunction().provides_inverse_Hessian_approximation:
self.getCostFunction().updateHessian()
invH_scale = None
else:
invH_scale = self._initial_H
# start the iteration:
n_iter = 0
n_last_break_down=-1
non_curable_break_down = False
converged = False
self.getCostFunction().setPoint() # Set point to initial guess value (takes the place of a getArgs call)
#args=self.getCostFunction().getArguments(x)
self.getCostFunction().calculateValue("Jx") #evaluate the function and store the result in the named variable
# note that call sets Jx=Jx_original
splitworld.copyVariable("Jx", "Jx_original")
self.getCostFunction().calculateGradient("g_Jx_0","g_Jx_1") #compute the gradient and store the result
while not converged and not non_curable_break_down and n_iter < self._imax:
k=0
break_down = False
reset_s_and_y = True
# initial step length for line search
alpha=1.0
#self._doCallback(n_iter, x, Jx, g_Jx)
while not converged and not break_down and k < self._restart and n_iter < self._imax:
#self.logger.info("\033[1;31miteration %d\033[1;30m"%n_iter)
self.logger.info("********** iteration %3d **********"%n_iter)
#self.logger.info("\tJ(x) = %s"%Jx)
#self.logger.debug("\tgrad f(x) = %s"%g_Jx)
if invH_scale:
self.logger.debug("\tH = %s"%invH_scale)
splitworld.copyVariable("g_Jx_0", "old_g_Jx_0")
splitworld.copyVariable("g_Jx_1", "old_g_Jx_1")
splitworld.copyVariable("Jx", "Jx_old")
# determine search direction
self._twoLoop(self.getCostFunction().splitworld, reset_s_and_y)
reset_s_and_y = False
# determine new step length using the last one as initial value
# however, avoid using too small steps for too long.
# FIXME: This is a bit specific to esys.downunder in that the
# inverse Hessian approximation is not scaled properly (only
# the regularization term is used at the moment)...
if invH_scale is None:
if alpha <= 0.5:
alpha=2*alpha
else:
# reset alpha for the case that the cost function does not
# provide an approximation of inverse H
alpha=1.0
alpha, phi_a = line_search(self.getCostFunction(), alpha)
# this function returns a scaling alpha for the search
# direction as well as the cost function evaluation and
# gradient for the new solution approximation x_new=x+alpha*p
self.logger.debug("\tSearch direction scaling alpha=%e"%alpha)
# execute the step
# This update operation has already been done in the line_search
#delta_x = alpha*p
#x_new = x + delta_x
Jx=splitworld.getFloatVariable("Jx")
converged = True
if self._J_tol:
Jx_old=splitworld.getFloatVariable("Jx_old")
Jx_original=splitworld.getFloatVariable("Jx_original")
flag=abs(Jx-Jx_old) <= self._J_tol * abs(Jx-Jx_original)
#flag=abs(Jx_new-Jx) <= self._J_tol * abs(Jx_new-Jx_0)
if self.logger.isEnabledFor(logging.DEBUG):
if flag:
self.logger.debug("Cost function has converged: dJ, J*J_tol = %e, %e"%(Jx_old-Jx,abs(Jx-Jx_original)*self._J_tol))
else:
self.logger.debug("Cost function checked: dJ, J*J_tol = %e, %e"%(Jx_old-Jx,abs(Jx)*self._J_tol))
converged = converged and flag
if self._m_tol:
def converged_check(self, **kwargs):
alpha=kwargs["alpha"]
m_tol=kwargs["m_tol"]
reg=self.importValue("regularization")
p=self.importValue("search_direction")
delta_x=alpha*p
x=self.importValue("current_point")
norm_x = reg.getNorm(x)
norm_dx = reg.getNorm(delta_x)
flag = norm_dx <= m_tol * norm_x
#if self.logger.isEnabledFor(logging.DEBUG):
#if flag:
#self.logger.debug("Solution has converged: dx, x*m_tol = %e, %e"%(norm_dx,norm_x*self._m_tol))
#else:
#self.logger.debug("Solution checked: dx, x*m_tol = %e, %e"%(norm_dx,norm_x*self._m_tol))
self.exportValue('conv_flag', flag)
# End of converged_check
self.getCostFunction().splitworld.addJobPerWorld(FunctionJob, converged_check, alpha=alpha, m_tol=self._m_tol, imports=["regularization", "search_direction", "current_point"])
self.getCostFunction().splitworld.runJobs()
flag=self.getCostFunction().splitworld.getFloatVariable("conv_flag")>0.001
converged = converged and flag
#Already done in the line_search call
#x=x_new
if converged:
self.logger.info("\tJ(x) = %s"%Jx)
break
# unfortunately there is more work to do!
def run_worker(self, **kwargs):
break_down=False
need_trunc=kwargs["need_trunc"]
# Need to do some imports here
alpha=kwargs["alpha"]
g_Jx=ArithmeticTuple(self.importValue("g_Jx_0"), self.importValue("g_Jx_1"))
old_g_Jx=ArithmeticTuple(self.importValue("old_g_Jx_0"), self.importValue("old_g_Jx_1"))
p=self.importValue("search_direction")
reg=self.importValue("regularization")
s_and_y=self.importValue("s_and_y")
##The original code had this check
#if g_Jx_new is None:
#args=self.getCostFunction().getArguments(x_new)
#g_Jx_new=self.getCostFunction().getGradient(x_new, args)
#delta_g=g_Jx_new-g_Jx
delta_g=g_Jx-old_g_Jx
delta_x=alpha*p;
rho=reg.getDualProduct(delta_x, delta_g)
if abs(rho)>0:
s_and_y.append((delta_x,delta_g, rho ))
else:
break_down=True
if need_trunc:
s_and_y.pop(0)
self.exportValue("break_down", break_down)
self.exportValue("s_and_y",s_and_y)
# End run_worker
# Only one world has s_and_y - so its important that only single jobs be run that manipulate
# s_and_y
self.getCostFunction().splitworld.addJob( FunctionJob, run_worker, alpha=alpha, need_trunc=(k>=self._truncation))
self.getCostFunction().splitworld.runJobs()
break_down=(splitworld.getLocalObjectVariable("break_down")>0.001)
self.getCostFunction().updateHessian()
#g_Jx=g_Jx_new
#Jx=Jx_new
k+=1
n_iter+=1
#self._doCallback(n_iter, x, Jx, g_Jx)
# delete oldest vector pair
# Already done in the run_worker
#if k>self._truncation: s_and_y.pop(0)
if not self.getCostFunction().provides_inverse_Hessian_approximation and not break_down:
# set the new scaling factor (approximation of inverse Hessian)
denom=self.getCostFunction().getDualProduct(delta_g, delta_g)
if denom > 0:
invH_scale=self.getCostFunction().getDualProduct(delta_x,delta_g)/denom
else:
invH_scale=self._initial_H
self.logger.debug("** Break down in H update. Resetting to initial value %s."%self._initial_H)
# case handling for inner iteration:
if break_down:
if n_iter == n_last_break_down+1:
non_curable_break_down = True
self.logger.debug("** Incurable break down detected in step %d."%n_iter)
else:
n_last_break_down = n_iter
self.logger.debug("** Break down detected in step %d. Iteration is restarted."%n_iter)
if not k < self._restart:
self.logger.debug("Iteration is restarted after %d steps."%n_iter)
# case handling for inner iteration:
#self._result=x
self._result=None
if n_iter >= self._imax:
self.logger.warn(">>>>>>>>>> Maximum number of iterations reached! <<<<<<<<<<")
raise MinimizerMaxIterReached("Gave up after %d steps."%n_iter)
elif non_curable_break_down:
self.logger.warn(">>>>>>>>>> Incurable breakdown! <<<<<<<<<<")
raise MinimizerIterationIncurableBreakDown("Gave up after %d steps."%n_iter)
self.logger.info("Success after %d iterations!"%n_iter)
#This version does nor return the result
#You need to set up a job to extract the result
def _twoLoop(self, splitworld, reset):
"""
Helper for the L-BFGS method.
See 'Numerical Optimization' by J. Nocedal for an explanation.
This has been converted to use splitworld.
As such it doesn't return a result, instead it stores
The "p" result in the "search_direction" variable
Depends on the following splitworld variables:
g_Jx_0, g_Jx_1
current_point
s_and_y
Other conversion notes:
The current cost function's inverseHessianApproximation and
dualProduct only
depend on the regularization term so this function can be
done entirely by one world. If the Hessian/dp ever needed
other worlds, this will need to be reworked.
Implicit in the above is that the overall cost function
has cf.provides_inverse_Hessian_approximation==True
"""
# Make a fn to push the work into subworld
def two_loop_worker(self, **kwargs):
reset=kwargs['reset']
x=self.importValue("current_point")
reg=self.importValue("regularization")
g_Jx=ArithmeticTuple(self.importValue("g_Jx_0"), self.importValue("g_Jx_1"))
if reset:
s_and_y = []
self.exportValue('s_and_y', list())
else:
s_and_y=self.importValue("s_and_y")
q=g_Jx
alpha=[]
for s,y, rho in reversed(s_and_y):
a=reg.getDualProduct(s, q)/rho
alpha.append(a)
q=q-a*y
r=reg.getInverseHessianApproximationAtPoint(q)
for s,y,rho in s_and_y:
beta = reg.getDualProduct(r, y)/rho
a = alpha.pop()
r = r + s * (a-beta)
# In the original version, the caller negated the result
self.exportValue("search_direction", -r)
# Note: this is only running on world world (of possibly many)
# Any jobs which also need the variables exported here
# need to be shared or the jobs need to run on the same world
splitworld.addJob(FunctionJob, two_loop_worker, reset=reset, imports=["current_point", "regularization", "g_Jx_0", "g_Jx_1"])
splitworld.runJobs()
|