/usr/include/scythestat/optimize.h is in libscythestat-dev 1.0.2-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 | /*
* Scythe Statistical Library Copyright (C) 2000-2002 Andrew D. Martin
* and Kevin M. Quinn; 2002-present Andrew D. Martin, Kevin M. Quinn,
* and Daniel Pemstein. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify under the terms of the GNU General Public License as
* published by Free Software Foundation; either version 2 of the
* License, or (at your option) any later version. See the text files
* COPYING and LICENSE, distributed with this source code, for further
* information.
* --------------------------------------------------------------------
* scythestat/optimize.h
*
*/
/*!
* \file optimize.h
* \brief Definitions of functions for doing numerical optimization
* and related operations.
*
* This file contains a number of functions that are useful for
* numerical optimization and maximum likelihood estimation. In
* addition, it contains some basic facilities for evaluating definite
* integrals.
*
* As is the case across Scythe, we provide both general and default
* template definitions for the functions in this file that return
* Matrix objects. The general definitions allow the user to
* customize the matrix_order and matrix_style of the returned Matrix,
* while the default versions return concrete matrices of the same
* matrix_order as the first (or only) Matrix argument to the
* function. In cases where we supply these two types of definitions,
* we explicitly document only the general version, although the
* default definition will typically appear in the function list
* below.
*
* \note
* Doxygen has some difficulty dealing with overloaded templates.
* Under certain circumstances it does not correctly process the
* definitions of default templates. In these cases, the definition
* for the default template will not even appear in the function list.
* We provide default templates for all of the Matrix-returning
* functions in this file.
*
*/
#ifndef SCYTHE_OPTIMIZE_H
#define SCYTHE_OPTIMIZE_H
#ifdef SCYTHE_COMPILE_DIRECT
#include "matrix.h"
#include "algorithm.h"
#include "error.h"
#include "rng.h"
#include "distributions.h"
#include "la.h"
#include "ide.h"
#include "smath.h"
#include "stat.h"
#else
#include "scythestat/matrix.h"
#include "scythestat/algorithm.h"
#include "scythestat/error.h"
#include "scythestat/rng.h"
#include "scythestat/distributions.h"
#include "scythestat/la.h"
#include "scythestat/ide.h"
#include "scythestat/smath.h"
#include "scythestat/stat.h"
#endif
/* We want to use an anonymous namespace to make the following consts
* and functions local to this file, but mingw doesn't play nice with
* anonymous namespaces so we do things differently when using the
* cross-compiler.
*/
#ifdef __MINGW32__
#define SCYTHE_MINGW32_STATIC static
#else
#define SCYTHE_MINGW32_STATIC
#endif
namespace scythe {
#ifndef __MINGW32__
namespace {
#endif
/* Functions (private to this file) that do very little... */
template <typename T, matrix_order O, matrix_style S>
SCYTHE_MINGW32_STATIC T donothing (const Matrix<T,O,S>& x)
{
return (T) 0.0;
}
template <typename T>
SCYTHE_MINGW32_STATIC T donothing (T& x)
{
return (T) 0.0;
}
#ifndef __MINGW32__
}
#endif
/* Return the machine epsilon
* Notes: Algorithm taken from Sedgewick, Robert. 1992. Algorithms
* in C++. Addison Wesley. pg. 561
*/
/*! \brief Compute the machine epsilon.
*
* The epsilon function returns the machine epsilon: the smallest
* number that, when summed with 1, produces a value greater than
* one.
*/
template <typename T>
T
epsilon()
{
T eps, del, neweps;
del = (T) 0.5;
eps = (T) 0.0;
neweps = (T) 1.0;
while ( del > 0 ) {
if ( 1 + neweps > 1 ) { /* Then the value might be too large */
eps = neweps; /* ...save the current value... */
neweps -= del; /* ...and decrement a bit */
} else { /* Then the value is too small */
neweps += del; /* ...so increment it */
}
del *= 0.5; /* Reduce the adjustment by half */
}
return eps;
}
/*! \brief Calculate the definite integral of a function from a to b.
*
* This function calculates the definite integral of a univariate
* function on the interval \f$[a,b]\f$.
*
* \param fun The function (or functor) whose definite integral is
* to be calculated. This function should both take and return a
* single argument of type T.
* \param a The starting value of the interval.
* \param b The ending value of the interval.
* \param N The number of subintervals to calculate. Increasing
* this number will improve the accuracy of the estimate but will
* also increase run-time.
*
* \throw scythe_invalid_arg (Level 1)
*
* \see adaptsimp(FUNCTOR fun, T a, T b, unsigned int& N, double tol = 1e-5)
* \note
* Users will typically wish to implement \a fun in terms of a
* functor. Using a functor provides a generic way in which to
* evaluate functions with more than one parameter. Furthermore,
* although one can pass a function pointer to this routine,
* the compiler cannot inline and fully optimize code
* referenced by function pointers.
*/
template <typename T, typename FUNCTOR>
T intsimp (FUNCTOR fun, T a, T b, unsigned int N)
{
SCYTHE_CHECK_10(a > b, scythe_invalid_arg,
"Lower limit larger than upper");
T I = (T) 0;
T w = (b - a) / N;
for (unsigned int i = 1; i <= N; ++i)
I += w * (fun(a +(i - 1) *w) + 4 * fun(a - w / 2 + i * w) +
fun(a + i * w)) / 6;
return I;
}
/*! \brief Calculate the definite integral of a function from a to b.
*
* This function calculates the definite integral of a univariate
* function on the interval \f$[a,b]\f$.
*
* \param fun The function (or functor) whose definite integral is
* to be calculated. This function should both take and return a
* single argument of type T.
* \param a The starting value of the interval.
* \param b The ending value of the interval.
* \param N The number of subintervals to calculate. Increasing
* this number will improve the accuracy of the estimate but will
* also increase run-time.
* \param tol The accuracy required. Both accuracy and run-time
* decrease as this number increases.
*
* \throw scythe_invalid_arg (Level 1)
*
* \see intsimp(FUNCTOR fun, T a, T b, unsigned int& N)
*
* \note
* Users will typically wish to implement \a fun in terms of a
* functor. Using a functor provides a generic way in which to
* evaluate functions with more than one parameter. Furthermore,
* although one can pass a function pointer to this routine,
* the compiler cannot inline and fully optimize code
* referenced by function pointers.
*/
template <typename T, typename FUNCTOR>
T adaptsimp(FUNCTOR fun, T a, T b, unsigned int N, double tol = 1e-5)
{
SCYTHE_CHECK_10(a > b, scythe_invalid_arg,
"Lower limit larger than upper");
T I = intsimp(fun, a, b, N);
if (std::fabs(I - intsimp(fun, a, b, N / 2)) > tol)
return adaptsimp(fun, a, (a + b) / 2, N, tol)
+ adaptsimp(fun, (a + b) / 2, b, N, tol);
return I;
}
/*! \brief Calculate gradient of a function using a forward
* difference formula.
*
* This function numerically calculates the gradient of a
* vector-valued function at \a theta using a forward difference
* formula.
*
* \param fun The function to calculate the gradient of. This
* function should both take and return a single Matrix (vector) of
* type T.
* \param theta The column vector of values at which to calculate
* the gradient of the function.
*
* \see gradfdifls(FUNCTOR fun, T alpha, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
* \see jacfdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta)
* \see hesscdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta)
*
* \throw scythe_dimension_error (Level 1)
*
* \note
* Users will typically wish to implement \a fun in terms of a
* functor. Using a functor provides a generic way in which to
* evaluate functions with more than one parameter. Furthermore,
* although one can pass a function pointer to this routine,
* the compiler cannot inline and fully optimize code
* referenced by function pointers.
*/
template <matrix_order RO, matrix_style RS, typename T,
matrix_order PO, matrix_style PS, typename FUNCTOR>
Matrix<T, RO, RS>
gradfdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta)
{
SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
"Theta not column vector");
unsigned int k = theta.size();
T h = std::sqrt(epsilon<T>());
h = std::sqrt(h);
Matrix<T,RO,RS> grad(k, 1);
Matrix<T,RO> e;
Matrix<T,RO> temp;
for (unsigned int i = 0; i < k; ++i) {
e = Matrix<T,RO>(k, 1);
e[i] = h;
temp = theta + e;
donothing(temp); // XXX I don't understand this
e = temp - theta;
grad[i] = (fun(theta + e) - fun(theta)) / e[i];
}
return grad;
}
// Default template version
template <typename T, matrix_order O, matrix_style S,
typename FUNCTOR>
Matrix<T, O, Concrete>
gradfdif (FUNCTOR fun, const Matrix<T,O,S>& theta)
{
return gradfdif<O,Concrete>(fun, theta);
}
/*! \brief Calculate the first derivative of the function using
* a forward difference formula.
*
* This function numerically calculates the first derivative of a
* function with respect to \a alpha at \f$theta + alpha \cdot p\f$
* using a forward difference formula. This function is primarily
* useful for linesearches.
*
* \param fun The function to calculate the first derivative of.
* This function should take a single Matrix<T> argument and return
* a value of type T.
* \param alpha Double the step length.
* \param theta A Matrix (vector) of parameter values at which to
* calculate the gradient.
* \param p A direction vector.
*
* \see gradfdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta)
* \see jacfdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta)
* \see hesscdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta)
*
* \throw scythe_dimension_error (Level 1)
*
* \note
* Users will typically wish to implement \a fun in terms of a
* functor. Using a functor provides a generic way in which to
* evaluate functions with more than one parameter. Furthermore,
* although one can pass a function pointer to this routine,
* the compiler cannot inline and fully optimize code
* referenced by function pointers.
*/
template <typename T, matrix_order PO1, matrix_style PS1,
matrix_order PO2, matrix_style PS2, typename FUNCTOR>
T
gradfdifls (FUNCTOR fun, T alpha, const Matrix<T,PO1,PS1>& theta,
const Matrix<T,PO2,PS2>& p)
{
SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
"Theta not column vector");
SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error,
"p not column vector");
unsigned int k = theta.size();
T h = std::sqrt(epsilon<T>());
h = std::sqrt(h);
//T h = std::sqrt(2.2e-16);
T deriv;
for (unsigned int i = 0; i < k; ++i) {
T temp = alpha + h;
donothing(temp);
T e = temp - alpha;
deriv = (fun(theta + (alpha + e) * p) - fun(theta + alpha * p))
/ e;
}
return deriv;
}
/*! \brief Calculate the Jacobian of a function using a forward
* difference formula.
*
* This function numerically calculates the Jacobian of a
* vector-valued function using a forward difference formula.
*
* \param fun The function to calculate the Jacobian of. This
* function should both take and return a Matrix (vector) of type
* T.
* \param theta The column vector of parameter values at which to
* take the Jacobian of \a fun.
*
* \see gradfdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta)
* \see gradfdifls(FUNCTOR fun, T alpha, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
* \see hesscdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta)
*
* \throw scythe_dimension_error (Level 1)
*
* \note
* Users will typically wish to implement \a fun in terms of a
* functor. Using a functor provides a generic way in which to
* evaluate functions with more than one parameter. Furthermore,
* although one can pass a function pointer to this routine,
* the compiler cannot inline and fully optimize code
* referenced by function pointers.
*/
template <matrix_order RO, matrix_style RS, typename T,
matrix_order PO, matrix_style PS, typename FUNCTOR>
Matrix<T,RO,RS>
jacfdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta)
{
SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
"Theta not column vector");
Matrix<T,RO> fval = fun(theta);
unsigned int k = theta.rows();
unsigned int n = fval.rows();
T h = std::sqrt(epsilon<T>()); //2.2e-16
h = std::sqrt(h);
Matrix<T,RO,RS> J(n,k);
Matrix<T,RO> e;
Matrix<T,RO> temp;
Matrix<T,RO> fthetae;
Matrix<T,RO> ftheta;
for (int i = 0; i < k; ++i) {
e = Matrix<T,RO>(k,1);
e[i] = h;
temp = theta + e;
donothing(temp); /// XXX ??
e = temp - theta;
fthetae = fun(theta + e);
ftheta = fun(theta);
for (unsigned int j = 0; j < n; ++j) {
J(j,i) = (fthetae[j] - ftheta[j]) / e[i];
}
}
return J;
}
// default template
template <typename T, matrix_order PO, matrix_style PS,
typename FUNCTOR>
Matrix<T,PO,PS>
jacfdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta)
{
return jacfdif<PO,Concrete>(fun, theta);
}
/*! \brief Calculate the Hessian of a function using a central
* difference formula.
*
* This function numerically calculates the Hessian of a
* vector-valued function using a central difference formula.
*
* \param fun The function to calculate the Hessian of. This
* function should take a Matrix (vector) of type T and return a
* single value of type T.
* \param theta The column vector of parameter values at which to
* calculate the Hessian.
*
* \see gradfdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta)
* \see gradfdifls(FUNCTOR fun, T alpha, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
* \see jacfdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta)
*
* \throw scythe_dimension_error
*
* \note
* Users will typically wish to implement \a fun in terms of a
* functor. Using a functor provides a generic way in which to
* evaluate functions with more than one parameter. Furthermore,
* although one can pass a function pointer to this routine,
* the compiler cannot inline and fully optimize code
* referenced by function pointers.
*/
template <matrix_order RO, matrix_style RS, typename T,
matrix_order PO, matrix_style PS, typename FUNCTOR>
Matrix<T, RO, RS>
hesscdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta)
{
SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
"Theta not column vector");
T fval = fun(theta);
//std::cout << std::endl;
//std::cout << "hesscdif theta = " << theta << "\n";
//std::cout << "hesscdif fun(theta) = " << fval << std::endl;
unsigned int k = theta.rows();
// stepsize CAREFUL -- THIS IS MACHINE SPECIFIC !!!!
T h2 = std::sqrt(epsilon<T>());
//T h2 = (T) 1e-10;
T h = std::sqrt(h2);
Matrix<T, RO, RS> H(k,k);
//std::cout << "h2 = " << h2 << " h = " << h << std::endl;
Matrix<T,RO> ei;
Matrix<T,RO> ej;
Matrix<T,RO> temp;
for (unsigned int i = 0; i < k; ++i) {
ei = Matrix<T,RO>(k, 1);
ei[i] = h;
temp = theta + ei;
donothing(temp); // XXX Again, I'm baffled
ei = temp - theta;
for (unsigned int j = 0; j < k; ++j){
ej = Matrix<T,RO>(k,1);
ej[j] = h;
temp = theta + ej;
donothing(temp); // XXX and again
ej = temp - theta;
if (i == j) {
H(i,i) = ( -fun(theta + 2.0 * ei) + 16.0 *
fun(theta + ei) - 30.0 * fval + 16.0 *
fun(theta - ei) -
fun(theta - 2.0 * ei)) / (12.0 * h2);
} else {
H(i,j) = ( fun(theta + ei + ej) - fun(theta + ei - ej)
- fun(theta - ei + ej) + fun(theta - ei - ej))
/ (4.0 * h2);
}
}
}
//std::cout << "end of hesscdif, H = " << H << "\n";
return H;
}
// default template
template <typename T, matrix_order PO, matrix_style PS,
typename FUNCTOR>
Matrix<T,PO,PS>
hesscdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta)
{
return hesscdif<PO,Concrete>(fun, theta);
}
/*! \brief Find the step length that minimizes an implied 1-dimensional function.
*
* This function performs a line search to find the step length
* that approximately minimizes an implied one dimensional
* function.
*
* \param fun The function to minimize. This function should take
* one Matrix (vector) argument of type T and return a single value
* of type T.
* \param theta A column vector of parameter values that anchor the
* 1-dimensional function.
* \param p A direction vector that creates the 1-dimensional
* function.
*
* \see linesearch2(FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p, rng<RNGTYPE>& runif)
* \see zoom(FUNCTOR fun, T alpha_lo, T alpha_hi, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
* \see BFGS(FUNCTOR fun, const Matrix<T,PO,PS>& theta, rng<RNGTYPE>& runif, unsigned int maxit, T tolerance, bool trace = false)
*
* \throw scythe_dimension_error (Level 1)
*
* \note
* Users will typically wish to implement \a fun in terms of a
* functor. Using a functor provides a generic way in which to
* evaluate functions with more than one parameter. Furthermore,
* although one can pass a function pointer to this routine,
* the compiler cannot inline and fully optimize code
* referenced by function pointers.
*/
template <typename T, matrix_order PO1, matrix_style PS1,
matrix_order PO2, matrix_style PS2, typename FUNCTOR>
T linesearch1 (FUNCTOR fun, const Matrix<T,PO1,PS1>& theta,
const Matrix<T,PO2,PS2>& p)
{
SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
"Theta not column vector");
SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error,
"p not column vector");
T alpha_bar = (T) 1.0;
T rho = (T) 0.9;
T c = (T) 0.5;
T alpha = alpha_bar;
Matrix<T,PO1> fgrad = gradfdif(fun, theta);
while (fun(theta + alpha * p) >
(fun(theta) + c * alpha * t(fgrad) * p)[0]) {
alpha = rho * alpha;
}
return alpha;
}
/*! \brief Find the step length that minimizes an implied 1-dimensional function.
*
* This function performs a line search to find the step length
* that approximately minimizes an implied one dimensional
* function.
*
* \param fun The function to minimize. This function should take
* one Matrix (vector) argument of type T and return a single value
* of type T.
* \param theta A column vector of parameter values that anchor the
* 1-dimensional function.
* \param p A direction vector that creates the 1-dimensional
* function.
* \param runif A random uniform number generator function object
* (an object that returns a random uniform variate on (0,1) when
* its () operator is invoked).
*
* \see linesearch1(FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
* \see zoom(FUNCTOR fun, T alpha_lo, T alpha_hi, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
* \see BFGS(FUNCTOR fun, const Matrix<T,PO,PS>& theta, rng<RNGTYPE>& runif, unsigned int maxit, T tolerance, bool trace = false)
*
* \throw scythe_dimension_error (Level 1)
*
* \note
* Users will typically wish to implement \a fun in terms of a
* functor. Using a functor provides a generic way in which to
* evaluate functions with more than one parameter. Furthermore,
* although one can pass a function pointer to this routine,
* the compiler cannot inline and fully optimize code
* referenced by function pointers.
*/
template <typename T, matrix_order PO1, matrix_style PS1,
matrix_order PO2, matrix_style PS2, typename FUNCTOR,
typename RNGTYPE>
T linesearch2 (FUNCTOR fun, const Matrix<T,PO1,PS1>& theta,
const Matrix<T,PO2,PS2>& p, rng<RNGTYPE>& runif)
{
SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
"Theta not column vector");
SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error,
"p not column vector");
T alpha_last = (T) 0.0;
T alpha_cur = (T) 1.0;
T alpha_max = (T) 10.0;
T c1 = (T) 1e-4;
T c2 = (T) 0.5;
unsigned int max_iter = 50;
T fgradalpha0 = gradfdifls(fun, (T) 0, theta, p);
for (unsigned int i = 0; i < max_iter; ++i) {
T phi_cur = fun(theta + alpha_cur * p);
T phi_last = fun(theta + alpha_last * p);
if ((phi_cur > (fun(theta) + c1 * alpha_cur * fgradalpha0))
|| ((phi_cur >= phi_last) && (i > 0))) {
T alphastar = zoom(fun, alpha_last, alpha_cur, theta, p);
return alphastar;
}
T fgradalpha_cur = gradfdifls(fun, alpha_cur, theta, p);
if (std::fabs(fgradalpha_cur) <= -1 * c2 * fgradalpha0)
return alpha_cur;
if ( fgradalpha_cur >= (T) 0.0) {
T alphastar = zoom(fun, alpha_cur, alpha_last, theta, p);
return alphastar;
}
alpha_last = alpha_cur;
// runif stuff below is probably not correc KQ 12/08/2006
// I think it should work now DBP 01/02/2007
alpha_cur = runif() * (alpha_max - alpha_cur) + alpha_cur;
}
return 0.001;
}
/*! \brief Find minimum of a function once bracketed.
*
* This function finds the minimum of a function, once bracketed.
*
* \param fun The function to minimize. This function should take
* one Matrix (vector) argument of type T and return a single value
* of type T.
* \param alpha_lo The lower bracket.
* \param alpha_hi The upper bracket.
* \param theta A column vector of parameter values that anchor the
* 1-dimensional function.
* \param p A direction vector that creates the 1-dimensional
*
* \see linesearch1(FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
* \see linesearch2(FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p, rng<RNGTYPE>& runif)
* \see BFGS(FUNCTOR fun, const Matrix<T,PO,PS>& theta, rng<RNGTYPE>& runif, unsigned int maxit, T tolerance, bool trace = false)
*
* \throw scythe_dimension_error (Level 1)
*
* \note
* Users will typically wish to implement \a fun in terms of a
* functor. Using a functor provides a generic way in which to
* evaluate functions with more than one parameter. Furthermore,
* although one can pass a function pointer to this routine,
* the compiler cannot inline and fully optimize code
* referenced by function pointers.
*
*/
template <typename T, matrix_order PO1, matrix_style PS1,
matrix_order PO2, matrix_style PS2, typename FUNCTOR>
T zoom (FUNCTOR fun, T alpha_lo, T alpha_hi,
const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
{
SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
"Theta not column vector");
SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error,
"p not column vector");
T alpha_j = (alpha_lo + alpha_hi) / 2.0;
T phi_0 = fun(theta);
T c1 = (T) 1e-4;
T c2 = (T) 0.5;
T fgrad0 = gradfdifls(fun, (T) 0, theta, p);
unsigned int count = 0;
unsigned int maxit = 20;
while(count < maxit) {
T phi_j = fun(theta + alpha_j * p);
T phi_lo = fun(theta + alpha_lo * p);
if ((phi_j > (phi_0 + c1 * alpha_j * fgrad0))
|| (phi_j >= phi_lo)){
alpha_hi = alpha_j;
} else {
T fgradj = gradfdifls(fun, alpha_j, theta, p);
if (std::fabs(fgradj) <= -1 * c2 * fgrad0){
return alpha_j;
}
if ( fgradj * (alpha_hi - alpha_lo) >= 0){
alpha_hi = alpha_lo;
}
alpha_lo = alpha_j;
}
++count;
}
return alpha_j;
}
/*! \brief Find function minimum using the BFGS algorithm.
*
* Numerically find the minimum of a function using the BFGS
* algorithm.
*
* \param fun The function to minimize. This function should take
* one Matrix (vector) argument of type T and return a single value
* of type T.
* \param theta A column vector of parameter values that anchor the
* 1-dimensional function.
* \param runif A random uniform number generator function object
* (an object that returns a random uniform variate on (0,1) when
* its () operator is invoked).
* \param maxit The maximum number of iterations.
* \param tolerance The convergence tolerance.
* \param trace Boolean value determining whether BFGS should print
* to stdout (defaults to false).
*
* \see linesearch1(FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
* \see linesearch2(FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p, rng<RNGTYPE>& runif)
* \see zoom(FUNCTOR fun, T alpha_lo, T alpha_hi, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
*
* \throw scythe_dimension_error (Level 1)
* \throw scythe_convergence_error (Level 0)
*
* \note
* Users will typically wish to implement \a fun in terms of a
* functor. Using a functor provides a generic way in which to
* evaluate functions with more than one parameter. Furthermore,
* although one can pass a function pointer to this routine,
* the compiler cannot inline and fully optimize code
* referenced by function pointers.
*/
// there were 2 versions of linesearch1-- the latter was what we
// had been calling linesearch2
template <matrix_order RO, matrix_style RS, typename T,
matrix_order PO, matrix_style PS,
typename FUNCTOR, typename RNGTYPE>
Matrix<T,RO,RS>
BFGS (FUNCTOR fun, const Matrix<T,PO,PS>& theta, rng<RNGTYPE>& runif,
unsigned int maxit, T tolerance, bool trace = false)
{
SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
"Theta not column vector");
unsigned int n = theta.size();
// H is initial inverse hessian
Matrix<T,RO> H = inv(hesscdif(fun, theta));
// gradient at starting values
Matrix<T,RO> fgrad = gradfdif(fun, theta);
Matrix<T,RO> thetamin = theta;
Matrix<T,RO> fgrad_new = fgrad;
Matrix<T,RO> I = eye<T,RO>(n);
Matrix<T,RO> s;
Matrix<T,RO> y;
unsigned int count = 0;
while( (t(fgrad_new)*fgrad_new)[0] > tolerance) {
Matrix<T> p = -1.0 * H * fgrad;
//std::cout << "initial H * fgrad = " << H * fgrad << "\n";
//std::cout << "initial p = " << p << "\n";
T alpha = linesearch2(fun, thetamin, p, runif);
//T alpha = linesearch1(fun, thetamin, p);
//std::cout << "after linesearch p = " << p << "\n";
Matrix<T> thetamin_new = thetamin + alpha * p;
fgrad_new = gradfdif(fun, thetamin_new);
s = thetamin_new - thetamin;
y = fgrad_new - fgrad;
T rho = 1.0 / (t(y) * s)[0];
H = (I - rho * s * t(y)) * H *(I - rho * y * t(s))
+ rho * s * t(s);
thetamin = thetamin_new;
fgrad = fgrad_new;
++count;
if (trace){
std::cout << "BFGS iteration = " << count << std::endl;
std::cout << "thetamin = " << (t(thetamin)) ;
std::cout << "gradient = " << (t(fgrad)) ;
std::cout << "t(gradient) * gradient = " <<
(t(fgrad) * fgrad) ;
std::cout << "function value = " << fun(thetamin) <<
std::endl << std::endl;
}
//std::cout << "Hessian = " << hesscdif(fun, theta) << "\n";
//std::cout << "H = " << H << "\n";
//std::cout << "alpha = " << alpha << std::endl;
//std::cout << "p = " << p << "\n";
//std::cout << "-1 * H * fgrad = " << -1.0 * H * fgrad << "\n";
SCYTHE_CHECK(count > maxit, scythe_convergence_error,
"Failed to converge. Try better starting values");
}
return thetamin;
}
// Default template
template <typename T, matrix_order O, matrix_style S,
typename FUNCTOR, typename RNGTYPE>
Matrix<T,O,Concrete>
BFGS (FUNCTOR fun, const Matrix<T,O,S>& theta, rng<RNGTYPE>& runif,
unsigned int maxit, T tolerance, bool trace = false)
{
return BFGS<O,Concrete> (fun, theta, runif, maxit, tolerance, trace);
}
/* Solves a system of n nonlinear equations in n unknowns of the form
* fun(thetastar) = 0 for thetastar given the function, starting
* value theta, max number of iterations, and tolerance.
* Uses Broyden's method.
*/
/*! \brief Solve a system of nonlinear equations.
*
* Solves a system of n nonlinear equations in n unknowns of the form
* \f$fun(\theta^*) = 0\f$ for \f$\theta^*\f$.
*
* \param fun The function to solve. The function should both take
* and return a Matrix of type T.
* \param theta A column vector of parameter values at which to
* start the solve procedure.
* \param maxit The maximum number of iterations.
* \param tolerance The convergence tolerance.
*
* \throw scythe_dimension_error (Level 1)
* \throw scythe_convergence_error (Level 1)
*
* \note
* Users will typically wish to implement \a fun in terms of a
* functor. Using a functor provides a generic way in which to
* evaluate functions with more than one parameter. Furthermore,
* although one can pass a function pointer to this routine,
* the compiler cannot inline and fully optimize code
* referenced by function pointers.
*/
template <matrix_order RO, matrix_style RS, typename T,
matrix_order PO, matrix_style PS, typename FUNCTOR>
Matrix<T,RO,RS>
nls_broyden(FUNCTOR fun, const Matrix<T,PO,PS>& theta,
unsigned int maxit = 5000, T tolerance = 1e-6)
{
SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
"Theta not column vector");
Matrix<T,RO> thetastar = theta;
Matrix<T,RO> B = jacfdif(fun, thetastar);
Matrix<T,RO> fthetastar;
Matrix<T,RO> p;
Matrix<T,RO> thetastar_new;
Matrix<T,RO> fthetastar_new;
Matrix<T,RO> s;
Matrix<T,RO> y;
for (unsigned int i = 0; i < maxit; ++i) {
fthetastar = fun(thetastar);
p = lu_solve(B, -1 * fthetastar);
T alpha = (T) 1.0;
thetastar_new = thetastar + alpha*p;
fthetastar_new = fun(thetastar_new);
s = thetastar_new - thetastar;
y = fthetastar_new - fthetastar;
B = B + ((y - B * s) * t(s)) / (t(s) * s);
thetastar = thetastar_new;
if (max(fabs(fthetastar_new)) < tolerance)
return thetastar;
}
SCYTHE_THROW_10(scythe_convergence_error, "Failed to converge. Try better starting values or increase maxit");
return thetastar;
}
// default template
template <typename T, matrix_order O, matrix_style S,
typename FUNCTOR>
Matrix<T,O,Concrete>
nls_broyden (FUNCTOR fun, const Matrix<T,O,S>& theta,
unsigned int maxit = 5000, T tolerance = 1e-6)
{
return nls_broyden<O,Concrete>(fun, theta, maxit, tolerance);
}
} // namespace scythe
#endif /* SCYTHE_OPTIMIZE_H */
|