1 #ifndef TIL_MINTOOLS_TPP
2 #define TIL_MINTOOLS_TPP
8 //---------------------------------------------------------------------------
10 /// A class to find a bracketing triplet around a function minimum.
11 template < typename TFunctor >
12 void Mnbrak<TFunctor>::operator()(input_prec & ax, input_prec & bx, input_prec & cx, input_type point, input_type dir)
14 if (m_verbose) std::cout << "[ BEGIN Bracketing ]" << std::endl;
17 // check that input dimension are equal
18 assert(size(point) == size(dir));
19 // resize buffer accordingly
20 resize(m_buf, size(point));
22 input_prec q, r, u, ulim;
25 detail::loop_xxx(*_1 = *_2 + ax * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
26 if (m_verbose) std::cout << "lambda = " << ax << std::endl;
27 m_fa = m_functor(m_buf);
29 //m_fa = m_functor(point + ax * dir);
30 detail::loop_xxx(*_1 = *_2 + bx * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
31 if (m_verbose) std::cout << "lambda = " << bx << std::endl;
32 m_fb = m_functor(m_buf);
33 //m_fb = m_functor(point + bx * dir);
35 // swap A and B so that we go downhill from A to B
39 std::swap(m_fa, m_fb);
42 cx = bx + GOLD * (bx - ax);
44 detail::loop_xxx(*_1 = *_2 + cx * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
45 if (m_verbose) std::cout << "lambda = " << cx << std::endl;
46 m_fc = m_functor(m_buf);
47 //m_fc = m_functor(point + cx * dir);
48 // Keep estimating until we have a bracketing triplet
51 r = ( bx - ax ) * ( m_fb - m_fc );
52 q = ( bx - cx ) * ( m_fb - m_fa );
53 // parabolic fit estimate
54 u = bx - ((bx-cx)*q - (bx-ax)*r) / (2.0 * sign(max(std::abs(q-r), EPSILON), q-r));
55 ulim = bx + GLIMIT * ( cx - bx );
56 // Check whether u is between b and c
57 if (same_sign(bx-u, u-cx))
59 detail::loop_xxx(*_1 = *_2 + u * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
60 if (m_verbose) std::cout << "lambda = " << u << std::endl;
61 fu = m_functor(m_buf);
62 //fu = m_functor(point + u * dir);
63 // If the value is smaller than fc, then [b,u,c] is a bracket
66 // rename [b u] as [a b], so that we return bracket [a b c]
67 shift_right(u, bx, ax);
68 shift_right(fu, m_fb, m_fa);
69 if (m_verbose) std::cout << ax << " " << bx << " " << cx << std::endl;
70 if (m_verbose) std::cout << "[ END Bracketing ]" << std::endl;
73 // If the value is bigger than fb, then [a,b,u] is a bracket
74 // NB: This probably happens very seldom, since u is the minimum parabolic fit.
77 // rename u as b so that we return bracket [a b c ]
80 if (m_verbose) std::cout << ax << " " << bx << " " << cx << std::endl;
81 if (m_verbose) std::cout << "[ END Bracketing ]" << std::endl;
85 // NB: in all other cases, it will be assumed that [b c u] will be our next triplet,
86 // be it a bracket or not, therefore we don't have special treatments and returns.
88 // If parabolic fit did not bracket, do a golden ratio [b - c -- *]
89 u = cx + GOLD * (cx - bx);
90 detail::loop_xxx(*_1 = *_2 + u * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
91 if (m_verbose) std::cout << "lambda = " << u << std::endl;
92 fu = m_functor(m_buf);
93 //fu = m_functor(point + u * dir);
95 // Check that u, the parabolic trial, is outside c but still before ulim
96 else if (same_sign(cx-u, u-ulim))
98 detail::loop_xxx(*_1 = *_2 + u * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
99 if (m_verbose) std::cout << "lambda = " << u << std::endl;
100 fu = m_functor(m_buf);
101 //fu = m_functor(point + u * dir);
102 // if fu < fc, do a golden ration [c - u -- *], and get rid of b.
105 shift_right(u + GOLD * (u-cx), u, cx, bx);
106 detail::loop_xxx(*_1 = *_2 + u * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
107 if (m_verbose) std::cout << "lambda = " << u << std::endl;
108 shift_right(m_functor(m_buf), fu, m_fc, m_fb);
109 //shift_right(m_functor(point + u * dir), fu, m_fc, m_fb);
111 // (else, we have a bracket)
113 // Check that u is beyond ulim and cx
114 else if (same_sign(u - ulim, ulim - cx))
117 detail::loop_xxx(*_1 = *_2 + u * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
118 if (m_verbose) std::cout << "lambda = " << u << std::endl;
119 fu = m_functor(m_buf);
120 //fu = m_functor(point + u * dir);
122 // If we are here, it means that u is before b, which does not make sense:
123 // do a golden search.
126 u = cx + GOLD*(cx-bx);
127 detail::loop_xxx(*_1 = *_2 + u * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
128 if (m_verbose) std::cout << "lambda = " << u << std::endl;
129 fu = m_functor(m_buf);
130 //fu = m_functor(point + u * dir);
133 // rename [b c u] into [a b c]
135 shift_right(u, cx, bx, ax);
136 shift_right(fu, m_fc, m_fb, m_fa);
138 if (m_verbose) std::cout << ax << " " << bx << " " << cx << std::endl;
139 if (m_verbose) std::cout << "[ END Bracketing ]" << std::endl;
142 //---------------------------------------------------------------------------
144 template < typename TFunctor >
145 const typename Mnbrak<TFunctor>::input_prec Mnbrak<TFunctor>::GOLD = (std::sqrt(5.0) + 1.0) / 2.0;
146 template < typename TFunctor >
147 const typename Mnbrak<TFunctor>::input_prec Mnbrak<TFunctor>::EPSILON = 128 * std::numeric_limits<input_prec>::epsilon();
148 template < typename TFunctor >
149 const typename Mnbrak<TFunctor>::input_prec Mnbrak<TFunctor>::GLIMIT = 100;
151 //---------------------------------------------------------------------------
153 template < typename TFunctor >
154 Brent<TFunctor>::Brent()
164 //---------------------------------------------------------------------------
166 template < typename TFunctor >
167 Brent<TFunctor>::Brent(TFunctor functor)
177 //---------------------------------------------------------------------------
179 /// A class to minimize a functional within a bracketing triplet.
180 template < typename TFunctor >
181 void Brent<TFunctor>::operator()
190 if (m_verbose) std::cout << "[ BEGIN Parabolic linemin ]" << std::endl;
191 using namespace expr;
192 typedef typename combine<input_prec, output_prec>::type max_prec;
193 resize(m_buf, size(point));
194 // The last and before last estimates of the minimum
196 // Their corresponding function values
197 output_prec fw, fv, fu;
198 // The center of [a, b]
200 // Distance moved on the last step last
201 input_prec stepLength = 0.0;
202 // Distance moved on the step before last
203 input_prec stepLength2 = 0.0;
205 max_prec r, p, q, tmp;
207 // Swap bracket if necessary
208 if (a > b) std::swap(a,b);
209 // Initialize all estimated minima at the bracket point m
211 detail::loop_xxx(*_1 = *_2 + m_xmin * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
212 if (m_verbose) std::cout << "lambda = " << m_xmin << std::endl;
213 fw = fv = m_fmin = m_functor(m_buf);
214 //fw = fv = m_fmin = m_functor(point + m_xmin * dir);
216 // main minimization loop
217 for (int iter = 0; iter < m_itmax; ++iter)
219 x_med = 0.5 * ( a + b );
220 //tol1 = m_tol * std::abs(m_xmin) + ZEPS;
224 //if (std::abs(m_xmin-x_med) <= tol2)
225 if (std::abs(m_xmin-x_med) <= (tol2-input_prec(0.5)*(b-a)))
227 if (m_verbose) std::cout << "[ END Parabolic linemin ]" << std::endl;
232 if (std::abs(stepLength2) > tol1)
234 r = (m_xmin-w) * (m_fmin-fv);
235 q = (m_xmin-v) * (m_fmin-fw);
236 p = (m_xmin-v) * q - (m_xmin-w)*r;
237 q = max_prec(2.0) * (q-r);
241 stepLength2 = stepLength;
242 // Is parabolic fit acceptable?
243 if (std::abs(p) >= std::abs(max_prec(0.5)*q*tmp) || p <= q * (a-m_xmin) || p >= q * (b-m_xmin))
245 // If not, take golden section step instead
246 //stepLength2 = ( m_xmin >= x_med ? a - m_xmin : b - m_xmin );
247 //stepLength = CGOLD * stepLength2;
248 stepLength = CGOLD * ( m_xmin >= x_med ? a - m_xmin : b - m_xmin );
252 // Accept parabolic fit
255 u = m_xmin + stepLength;
256 if (u-a < tol2 || b-u < tol2)
257 stepLength = SIGN(tol1, x_med-m_xmin);
263 //stepLength2 = ( m_xmin >= x_med ? a - m_xmin : b - m_xmin );
264 //stepLength = CGOLD * stepLength2;
265 stepLength = CGOLD * ( m_xmin >= x_med ? a - m_xmin : b - m_xmin );
268 u = (std::abs(stepLength) >= tol1 ? m_xmin+stepLength : m_xmin+sign(tol1,stepLength));
269 // This is the one function evaluation per iteration
270 detail::loop_xxx(*_1 = *_2 + u * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
271 if (m_verbose) std::cout << "lambda = " << u << std::endl;
272 fu = m_functor(m_buf);
273 //fu = m_functor(point + u * dir);
274 // Now decide what to do with out function evaluation
277 if (u >= m_xmin) a = m_xmin; else b = m_xmin;
278 shift_right(u, m_xmin, w, v);
279 shift_right(fu, m_fmin, fw, fv);
283 if (u < m_xmin) a = u; else b = u;
284 if (fu <= fw || w == m_xmin)
287 shift_right(fu,fw,fv);
289 else if (fu <= fv || v == m_xmin || v == w)
296 std::cerr << "Warning: Brent reached maximum iteration limit of " << m_itmax << std::endl;
297 if (m_verbose) std::cout << "[ END Parabolic linemin ]" << std::endl;
301 //---------------------------------------------------------------------------
303 template < typename TFunctor >
304 const typename Brent<TFunctor>::input_prec Brent<TFunctor>::CGOLD = (3.0 - std::sqrt(5.0)) / 2.0;
306 //---------------------------------------------------------------------------
308 template < typename TFunctor >
309 typename LineMin<TFunctor>::output_prec
310 LineMin<TFunctor>::operator()
313 const input_type & dir
316 using namespace expr;
320 m_mnbrak(a, m, b, p, dir);
321 m_brent(a, m, b, p, dir);
322 if (m_verbose) std::cout << "linemin: " << m_brent.xmin() << " " << m_brent.fmin() << std::endl;
323 detail::loop_xx(*_1 += *_2 * m_brent.xmin(), p.begin(), p.end(), dir.begin());
324 //p += dir * m_brent.xmin();
325 return m_brent.fmin();
329 //---------------------------------------------------------------------------
331 template < typename TFunctor, typename TLineMin >
332 typename Powell<TFunctor, TLineMin>::input_type Powell<TFunctor, TLineMin>::operator()(input_type p)
334 using namespace expr;
336 const output_prec EPSILON = 128 * std::numeric_limits<output_prec>::epsilon();
338 // function value at point p, before line min
340 output_prec delta, fpext, tmp;
342 input_type pt, pext, xt, dir;
344 resize(pext, size(p));
346 resize(dir, size(p));
348 resize(m_pmin, size(p));
350 typename std::vector<input_type>::iterator iDirBig;
353 std::copy(p.begin(), p.end(), m_pmin.begin());
355 m_fmin = this->functor()(m_pmin);
357 int n = size(m_pmin);
359 this->initDirections(n);
364 for (this->nIter() = 0; this->nIter() < this->maxIter(); ++this->nIter())
366 //std::cout << "Iteration " << m_niter << " start from " << m_fmin << " : " << m_pmin << std::endl;
367 std::cout << "Iteration " << this->nIter() << " start from " << m_fmin << std::endl;
369 // save information from previous iteration
373 // minimize over all directions
374 this->minOverAllDirections(iDirBig, delta);
376 // Stopping criterion:
377 // If we didn't minimize that much, return
378 // if (fp - m_fmin < ftol) return m_pmin;
379 if (2.0*(fp-m_fmin) <= this->ftol() * (std::abs(fp) + std::abs(m_fmin)) + EPSILON)
384 // Get information at an extrapolated point, twice as far as m_pmin from previous position.
385 // This will help us to decide whether we want to update the set of directions or not
386 detail::loop_xxx(*_1 = input_prec(2) * *_2 - *_3, pext.begin(), pext.end(), m_pmin.begin(), pt.begin());
387 //pext = input_prec(2) * m_pmin - pt;
388 fpext = this->functor()(pext);
390 // Update directions only if minimization along the best direction is not "played out".
393 // another magic criterion, that detect either if the biggest direction indeed contributed
394 // much more than the others, or that we are near the bottom of the parabole in this
396 tmp = output_prec(2) * ( fp - output_prec(2) * m_fmin + fpext ) * square( fp - m_fmin - delta ) - delta * square( fp - fpext );
399 detail::loop_xxx( *_1 = *_2 - *_3, dir.begin(), dir.end(), m_pmin.begin(), pt.begin());
401 m_fmin = m_linemin(m_pmin, dir);
403 m_dirs[ibig] = m_dirs[n];
407 //m_dirs[ibig] = dir;
412 std::cerr << "Warning: Powell reached maximum iteration limit of " << this->maxIter() << std::endl;
416 //---------------------------------------------------------------------------
418 template < typename TFunctor, typename TLineMin >
419 void Powell<TFunctor, TLineMin>::initDirections(std::size_t n)
422 if (m_initStd.size())
424 for (std::size_t i = 0; i != n; ++i)
426 resize(m_dirs[i], n);
427 std::fill(m_dirs[i].begin(), m_dirs[i].end(), input_prec(0));
428 m_dirs[i][i] = m_initStd[i];
433 for (std::size_t i = 0; i != n; ++i)
435 resize(m_dirs[i], n);
436 std::fill(m_dirs[i].begin(), m_dirs[i].end(), input_prec(0));
437 m_dirs[i][i] = input_prec(1);
442 //---------------------------------------------------------------------------
444 template < typename TFunctor, typename TLineMin >
445 void Powell<TFunctor, TLineMin>::minOverAllDirections(typename std::vector<input_type>::iterator & iDirBig, output_prec & delta)
447 delta = output_prec(-1);
450 for (typename std::vector<input_type>::iterator iDir = m_dirs.begin(); iDir != m_dirs.end() ; ++iDir)
453 m_fmin = m_linemin(m_pmin, *iDir);
454 //std::cout << "dirmin " << count++ << " " << m_fmin << " : " << m_pmin << std::endl;
455 std::cout << "dirmin " << count++ << " " << m_fmin << std::endl;
456 // Keep this direction if it was the biggest decrease
457 if (fpext - m_fmin > delta)
459 delta = fpext - m_fmin;
466 //---------------------------------------------------------------------------
468 // TODO: rewrite all these iterative minimization as steps only, let the user loop himself, that
469 // allows for incredible flexibility, for printing, debugging, whatever.
470 template < typename TFunctor, typename TGradFunctor, typename TLineMin >
471 typename PRConjugateGradient<TFunctor, TGradFunctor, TLineMin>::input_type
472 PRConjugateGradient<TFunctor, TGradFunctor, TLineMin>::operator()(input_type p)
474 const output_prec EPSILON = 128 * std::numeric_limits<output_prec>::epsilon();
476 std::size_t n = p.size();
478 // We use the function version of resize, and not the member function, because input_type might
479 // not have this member function.
481 resize(m_conjgrad, n);
483 // temporary variable to hold gradient
484 // TODO: right now, gradtmp is not allocated, this task is left to the gradient
485 // algo. I think it sucks.
487 output_prec gg, dgg, gam;
492 fp = this->functor()(p);
493 this->dfunctor()(p, m_grad);
494 std::for_each(m_grad.begin(), m_grad.end(), std::negate<input_prec>());
495 std::copy(m_grad.begin(), m_grad.end(), m_conjgrad.begin());
497 for (this->nIter() = 0; this->nIter() < this->maxIter(); ++this->nIter())
499 std::cout << "CG iter " << this->nIter();
501 //std::copy(p.begin(), p.end(), std::ostream_iterator<double>(std::cout, " "));
502 std::cout << std::endl;
504 input_type p_tmp = p;
505 // line minimization along conjugate direction
506 m_fmin = m_linemin(p, m_conjgrad);
507 if (2*std::abs(fp - m_fmin) <= this->ftol() * (std::abs(m_fmin) + std::abs(fp)) + EPSILON)
512 // Stopping criterion based on maximum motion
513 if (this->min_step() > 0.0)
516 for (std::size_t i = 0; i < n; ++i)
518 if (mx < std::abs(p[i] - p_tmp[i]))
520 mx = std::abs(m_conjgrad[i]);
523 std::cout << "Max motion: " << mx << std::endl;
524 if (mx < this->min_step())
530 this->dfunctor()(p, gradtmp);
533 for (std::size_t i = 0; i < n; ++i)
535 gg += square(m_grad[i]);
536 dgg += (gradtmp[i]+m_grad[i]) * gradtmp[i];
537 dgg2 += square(gradtmp[i]);
544 //std::cout << "dgg: " << dgg << " , dgg2: " << dgg2 << std::endl;
546 for (std::size_t i = 0; i < n; ++i)
548 m_grad[i] = -gradtmp[i];
549 m_conjgrad[i] = m_grad[i] + gam * m_conjgrad[i];
552 std::cout << "max iter reached" << std::endl;
557 //---------------------------------------------------------------------------
559 template < typename TFunctor, typename TGradFunctor, typename TLineMin >
560 GradientDescent<TFunctor, TGradFunctor, TLineMin>::GradientDescent
563 , TGradFunctor gradfunctor
566 : Basis(functor, gradfunctor)
572 //---------------------------------------------------------------------------
574 template < typename TFunctor, typename TGradFunctor, typename TLineMin >
575 typename GradientDescent<TFunctor, TGradFunctor, TLineMin>::input_type
576 GradientDescent<TFunctor, TGradFunctor, TLineMin>::operator()(input_type p)
578 const output_prec EPSILON = 128 * std::numeric_limits<output_prec>::epsilon();
580 // temporary variable to hold gradient
585 fp = this->functor()(p);
587 for (this->nIter() = 0; this->nIter() < this->maxIter(); ++this->nIter())
589 std::cout << "GD iter " << this->nIter() << std::endl; // << " : " << p << std::endl;
591 this->dfunctor()(p, m_grad);
592 // negate gradient to point toward smaller values
593 std::for_each(m_grad.begin(), m_grad.end(), std::negate<input_prec>());
594 // line minimization along gradient direction
595 m_fmin = m_linemin(p, m_grad);
598 if (2*std::abs(fp - m_fmin) <= this->ftol() * (std::abs(m_fmin) + std::abs(fp)) + EPSILON)
608 //---------------------------------------------------------------------------
612 #endif /*MINTOOLS_H_*/