aimstil  5.0.5
minTools.tpp
Go to the documentation of this file.
1 #ifndef TIL_MINTOOLS_TPP
2 #define TIL_MINTOOLS_TPP
3 
4 namespace til
5 {
6 
7 
8  //---------------------------------------------------------------------------
9 
10  /// A class to find a bracketing triplet around a function minimum.
11  template < typename TFunctor >
12  void Mnbrak<TFunctor>::operator()(input_prec & ax, input_prec & bx, input_prec & cx, input_type point, input_type dir)
13  {
14  if (m_verbose) std::cout << "[ BEGIN Bracketing ]" << std::endl;
15 
16  using namespace expr;
17  // check that input dimension are equal
18  assert(size(point) == size(dir));
19  // resize buffer accordingly
20  resize(m_buf, size(point));
21 
22  input_prec q, r, u, ulim;
23  output_prec fu;
24 
25  detail::loop_xxx(*_1 = *_2 + ax * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
26  if (m_verbose) std::cout << "lambda = " << ax << std::endl;
27  m_fa = m_functor(m_buf);
28 
29  //m_fa = m_functor(point + ax * dir);
30  detail::loop_xxx(*_1 = *_2 + bx * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
31  if (m_verbose) std::cout << "lambda = " << bx << std::endl;
32  m_fb = m_functor(m_buf);
33  //m_fb = m_functor(point + bx * dir);
34 
35  // swap A and B so that we go downhill from A to B
36  if (m_fb > m_fa)
37  {
38  std::swap(ax, bx);
39  std::swap(m_fa, m_fb);
40  }
41  // First guess for c
42  cx = bx + GOLD * (bx - ax);
43 
44  detail::loop_xxx(*_1 = *_2 + cx * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
45  if (m_verbose) std::cout << "lambda = " << cx << std::endl;
46  m_fc = m_functor(m_buf);
47  //m_fc = m_functor(point + cx * dir);
48  // Keep estimating until we have a bracketing triplet
49  while (m_fb > m_fc)
50  {
51  r = ( bx - ax ) * ( m_fb - m_fc );
52  q = ( bx - cx ) * ( m_fb - m_fa );
53  // parabolic fit estimate
54  u = bx - ((bx-cx)*q - (bx-ax)*r) / (2.0 * sign(max(std::abs(q-r), EPSILON), q-r));
55  ulim = bx + GLIMIT * ( cx - bx );
56  // Check whether u is between b and c
57  if (same_sign(bx-u, u-cx))
58  {
59  detail::loop_xxx(*_1 = *_2 + u * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
60  if (m_verbose) std::cout << "lambda = " << u << std::endl;
61  fu = m_functor(m_buf);
62  //fu = m_functor(point + u * dir);
63  // If the value is smaller than fc, then [b,u,c] is a bracket
64  if (fu < m_fc)
65  {
66  // rename [b u] as [a b], so that we return bracket [a b c]
67  shift_right(u, bx, ax);
68  shift_right(fu, m_fb, m_fa);
69  if (m_verbose) std::cout << ax << " " << bx << " " << cx << std::endl;
70  if (m_verbose) std::cout << "[ END Bracketing ]" << std::endl;
71  return;
72  }
73  // If the value is bigger than fb, then [a,b,u] is a bracket
74  // NB: This probably happens very seldom, since u is the minimum parabolic fit.
75  else if (fu > m_fb)
76  {
77  // rename u as b so that we return bracket [a b c ]
78  cx = u;
79  m_fc = fu;
80  if (m_verbose) std::cout << ax << " " << bx << " " << cx << std::endl;
81  if (m_verbose) std::cout << "[ END Bracketing ]" << std::endl;
82  return;
83  }
84 
85  // NB: in all other cases, it will be assumed that [b c u] will be our next triplet,
86  // be it a bracket or not, therefore we don't have special treatments and returns.
87 
88  // If parabolic fit did not bracket, do a golden ratio [b - c -- *]
89  u = cx + GOLD * (cx - bx);
90  detail::loop_xxx(*_1 = *_2 + u * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
91  if (m_verbose) std::cout << "lambda = " << u << std::endl;
92  fu = m_functor(m_buf);
93  //fu = m_functor(point + u * dir);
94  }
95  // Check that u, the parabolic trial, is outside c but still before ulim
96  else if (same_sign(cx-u, u-ulim))
97  {
98  detail::loop_xxx(*_1 = *_2 + u * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
99  if (m_verbose) std::cout << "lambda = " << u << std::endl;
100  fu = m_functor(m_buf);
101  //fu = m_functor(point + u * dir);
102  // if fu < fc, do a golden ration [c - u -- *], and get rid of b.
103  if (fu < m_fc)
104  {
105  shift_right(u + GOLD * (u-cx), u, cx, bx);
106  detail::loop_xxx(*_1 = *_2 + u * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
107  if (m_verbose) std::cout << "lambda = " << u << std::endl;
108  shift_right(m_functor(m_buf), fu, m_fc, m_fb);
109  //shift_right(m_functor(point + u * dir), fu, m_fc, m_fb);
110  }
111  // (else, we have a bracket)
112  }
113  // Check that u is beyond ulim and cx
114  else if (same_sign(u - ulim, ulim - cx))
115  {
116  u = ulim;
117  detail::loop_xxx(*_1 = *_2 + u * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
118  if (m_verbose) std::cout << "lambda = " << u << std::endl;
119  fu = m_functor(m_buf);
120  //fu = m_functor(point + u * dir);
121  }
122  // If we are here, it means that u is before b, which does not make sense:
123  // do a golden search.
124  else
125  {
126  u = cx + GOLD*(cx-bx);
127  detail::loop_xxx(*_1 = *_2 + u * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
128  if (m_verbose) std::cout << "lambda = " << u << std::endl;
129  fu = m_functor(m_buf);
130  //fu = m_functor(point + u * dir);
131  }
132 
133  // rename [b c u] into [a b c]
134 
135  shift_right(u, cx, bx, ax);
136  shift_right(fu, m_fc, m_fb, m_fa);
137  }
138  if (m_verbose) std::cout << ax << " " << bx << " " << cx << std::endl;
139  if (m_verbose) std::cout << "[ END Bracketing ]" << std::endl;
140  }
141 
142  //---------------------------------------------------------------------------
143 
144  template < typename TFunctor >
145  const typename Mnbrak<TFunctor>::input_prec Mnbrak<TFunctor>::GOLD = (std::sqrt(5.0) + 1.0) / 2.0;
146  template < typename TFunctor >
147  const typename Mnbrak<TFunctor>::input_prec Mnbrak<TFunctor>::EPSILON = 128 * std::numeric_limits<input_prec>::epsilon();
148  template < typename TFunctor >
149  const typename Mnbrak<TFunctor>::input_prec Mnbrak<TFunctor>::GLIMIT = 100;
150 
151  //---------------------------------------------------------------------------
152 
153  template < typename TFunctor >
154  Brent<TFunctor>::Brent()
155  : m_functor()
156  , m_xmin()
157  , m_fmin()
158  , m_tol()
159  , m_itmax()
160  {
161  this->init();
162  }
163 
164  //---------------------------------------------------------------------------
165 
166  template < typename TFunctor >
167  Brent<TFunctor>::Brent(TFunctor functor)
168  : m_functor(functor)
169  , m_xmin()
170  , m_fmin()
171  , m_tol()
172  , m_itmax()
173  {
174  this->init();
175  }
176 
177  //---------------------------------------------------------------------------
178 
179  /// A class to minimize a functional within a bracketing triplet.
180  template < typename TFunctor >
181  void Brent<TFunctor>::operator()
182  (
183  input_prec a
184  , input_prec m
185  , input_prec b
186  , input_type point
187  , input_type dir
188  )
189  {
190  if (m_verbose) std::cout << "[ BEGIN Parabolic linemin ]" << std::endl;
191  using namespace expr;
192  typedef typename combine<input_prec, output_prec>::type max_prec;
193  resize(m_buf, size(point));
194  // The last and before last estimates of the minimum
195  input_prec w, v;
196  // Their corresponding function values
197  output_prec fw, fv, fu;
198  // The center of [a, b]
199  input_prec x_med;
200  // Distance moved on the last step last
201  input_prec stepLength = 0.0;
202  // Distance moved on the step before last
203  input_prec stepLength2 = 0.0;
204  input_prec u;
205  max_prec r, p, q, tmp;
206  max_prec tol1, tol2;
207  // Swap bracket if necessary
208  if (a > b) std::swap(a,b);
209  // Initialize all estimated minima at the bracket point m
210  m_xmin = w = v = m;
211  detail::loop_xxx(*_1 = *_2 + m_xmin * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
212  if (m_verbose) std::cout << "lambda = " << m_xmin << std::endl;
213  fw = fv = m_fmin = m_functor(m_buf);
214  //fw = fv = m_fmin = m_functor(point + m_xmin * dir);
215 
216  // main minimization loop
217  for (int iter = 0; iter < m_itmax; ++iter)
218  {
219  x_med = 0.5 * ( a + b );
220  //tol1 = m_tol * std::abs(m_xmin) + ZEPS;
221  tol1 = m_tol;
222  tol2 = 2 * tol1;
223  // Convergence test
224  //if (std::abs(m_xmin-x_med) <= tol2)
225  if (std::abs(m_xmin-x_med) <= (tol2-input_prec(0.5)*(b-a)))
226  {
227  if (m_verbose) std::cout << "[ END Parabolic linemin ]" << std::endl;
228  return;
229  }
230 
231  // Parabolic fit
232  if (std::abs(stepLength2) > tol1)
233  {
234  r = (m_xmin-w) * (m_fmin-fv);
235  q = (m_xmin-v) * (m_fmin-fw);
236  p = (m_xmin-v) * q - (m_xmin-w)*r;
237  q = max_prec(2.0) * (q-r);
238  if (q > 0) p = -p;
239  q = std::abs(q);
240  tmp = stepLength2;
241  stepLength2 = stepLength;
242  // Is parabolic fit acceptable?
243  if (std::abs(p) >= std::abs(max_prec(0.5)*q*tmp) || p <= q * (a-m_xmin) || p >= q * (b-m_xmin))
244  {
245  // If not, take golden section step instead
246  //stepLength2 = ( m_xmin >= x_med ? a - m_xmin : b - m_xmin );
247  //stepLength = CGOLD * stepLength2;
248  stepLength = CGOLD * ( m_xmin >= x_med ? a - m_xmin : b - m_xmin );
249  }
250  else
251  {
252  // Accept parabolic fit
253  stepLength = p / q;
254  /*
255  u = m_xmin + stepLength;
256  if (u-a < tol2 || b-u < tol2)
257  stepLength = SIGN(tol1, x_med-m_xmin);
258  */
259  }
260  }
261  else
262  {
263  //stepLength2 = ( m_xmin >= x_med ? a - m_xmin : b - m_xmin );
264  //stepLength = CGOLD * stepLength2;
265  stepLength = CGOLD * ( m_xmin >= x_med ? a - m_xmin : b - m_xmin );
266  }
267 
268  u = (std::abs(stepLength) >= tol1 ? m_xmin+stepLength : m_xmin+sign(tol1,stepLength));
269  // This is the one function evaluation per iteration
270  detail::loop_xxx(*_1 = *_2 + u * *_3, m_buf.begin(), m_buf.end(), point.begin(), dir.begin());
271  if (m_verbose) std::cout << "lambda = " << u << std::endl;
272  fu = m_functor(m_buf);
273  //fu = m_functor(point + u * dir);
274  // Now decide what to do with out function evaluation
275  if (fu <= m_fmin)
276  {
277  if (u >= m_xmin) a = m_xmin; else b = m_xmin;
278  shift_right(u, m_xmin, w, v);
279  shift_right(fu, m_fmin, fw, fv);
280  }
281  else
282  {
283  if (u < m_xmin) a = u; else b = u;
284  if (fu <= fw || w == m_xmin)
285  {
286  shift_right(u,w,v);
287  shift_right(fu,fw,fv);
288  }
289  else if (fu <= fv || v == m_xmin || v == w)
290  {
291  v = u;
292  fv = fu;
293  }
294  }
295  }
296  std::cerr << "Warning: Brent reached maximum iteration limit of " << m_itmax << std::endl;
297  if (m_verbose) std::cout << "[ END Parabolic linemin ]" << std::endl;
298  //return m_xmin;
299  }
300 
301  //---------------------------------------------------------------------------
302 
303  template < typename TFunctor >
304  const typename Brent<TFunctor>::input_prec Brent<TFunctor>::CGOLD = (3.0 - std::sqrt(5.0)) / 2.0;
305 
306  //---------------------------------------------------------------------------
307 
308  template < typename TFunctor >
309  typename LineMin<TFunctor>::output_prec
310  LineMin<TFunctor>::operator()
311  (
312  input_type & p,
313  const input_type & dir
314  )
315  {
316  using namespace expr;
317  input_prec a, m, b;
318  a = input_prec(0);
319  m = input_prec(1);
320  m_mnbrak(a, m, b, p, dir);
321  m_brent(a, m, b, p, dir);
322  if (m_verbose) std::cout << "linemin: " << m_brent.xmin() << " " << m_brent.fmin() << std::endl;
323  detail::loop_xx(*_1 += *_2 * m_brent.xmin(), p.begin(), p.end(), dir.begin());
324  //p += dir * m_brent.xmin();
325  return m_brent.fmin();
326  }
327 
328 
329  //---------------------------------------------------------------------------
330 
331  template < typename TFunctor, typename TLineMin >
332  typename Powell<TFunctor, TLineMin>::input_type Powell<TFunctor, TLineMin>::operator()(input_type p)
333  {
334  using namespace expr;
335 
336  const output_prec EPSILON = 128 * std::numeric_limits<output_prec>::epsilon();
337 
338  // function value at point p, before line min
339  output_prec fp;
340  output_prec delta, fpext, tmp;
341 
342  input_type pt, pext, xt, dir;
343  resize(pt, size(p));
344  resize(pext, size(p));
345  resize(xt, size(p));
346  resize(dir, size(p));
347 
348  resize(m_pmin, size(p));
349 
350  typename std::vector<input_type>::iterator iDirBig;
351 
352  //int ibig;
353  std::copy(p.begin(), p.end(), m_pmin.begin());
354  //m_pmin = p;
355  m_fmin = this->functor()(m_pmin);
356 
357  int n = size(m_pmin);
358 
359  this->initDirections(n);
360  //resize(pext, n);
361 
362  //pt = m_pmin;
363 
364  for (this->nIter() = 0; this->nIter() < this->maxIter(); ++this->nIter())
365  {
366  //std::cout << "Iteration " << m_niter << " start from " << m_fmin << " : " << m_pmin << std::endl;
367  std::cout << "Iteration " << this->nIter() << " start from " << m_fmin << std::endl;
368 
369  // save information from previous iteration
370  fp = m_fmin;
371  pt = m_pmin;
372 
373  // minimize over all directions
374  this->minOverAllDirections(iDirBig, delta);
375 
376  // Stopping criterion:
377  // If we didn't minimize that much, return
378  // if (fp - m_fmin < ftol) return m_pmin;
379  if (2.0*(fp-m_fmin) <= this->ftol() * (std::abs(fp) + std::abs(m_fmin)) + EPSILON)
380  {
381  return m_pmin;
382  }
383 
384  // Get information at an extrapolated point, twice as far as m_pmin from previous position.
385  // This will help us to decide whether we want to update the set of directions or not
386  detail::loop_xxx(*_1 = input_prec(2) * *_2 - *_3, pext.begin(), pext.end(), m_pmin.begin(), pt.begin());
387  //pext = input_prec(2) * m_pmin - pt;
388  fpext = this->functor()(pext);
389 
390  // Update directions only if minimization along the best direction is not "played out".
391  if (fpext < fp)
392  {
393  // another magic criterion, that detect either if the biggest direction indeed contributed
394  // much more than the others, or that we are near the bottom of the parabole in this
395  // direction
396  tmp = output_prec(2) * ( fp - output_prec(2) * m_fmin + fpext ) * square( fp - m_fmin - delta ) - delta * square( fp - fpext );
397  if (tmp < 0)
398  {
399  detail::loop_xxx( *_1 = *_2 - *_3, dir.begin(), dir.end(), m_pmin.begin(), pt.begin());
400  //dir = m_pmin - pt;
401  m_fmin = m_linemin(m_pmin, dir);
402  /*
403  m_dirs[ibig] = m_dirs[n];
404  m_dirs[n] = dir;
405  */
406  *iDirBig = dir;
407  //m_dirs[ibig] = dir;
408  }
409  }
410  } while(1);
411 
412  std::cerr << "Warning: Powell reached maximum iteration limit of " << this->maxIter() << std::endl;
413  return m_pmin;
414  }
415 
416  //---------------------------------------------------------------------------
417 
418  template < typename TFunctor, typename TLineMin >
419  void Powell<TFunctor, TLineMin>::initDirections(std::size_t n)
420  {
421  m_dirs.resize(n);
422  if (m_initStd.size())
423  {
424  for (std::size_t i = 0; i != n; ++i)
425  {
426  resize(m_dirs[i], n);
427  std::fill(m_dirs[i].begin(), m_dirs[i].end(), input_prec(0));
428  m_dirs[i][i] = m_initStd[i];
429  }
430  }
431  else
432  {
433  for (std::size_t i = 0; i != n; ++i)
434  {
435  resize(m_dirs[i], n);
436  std::fill(m_dirs[i].begin(), m_dirs[i].end(), input_prec(0));
437  m_dirs[i][i] = input_prec(1);
438  }
439  }
440  }
441 
442  //---------------------------------------------------------------------------
443 
444  template < typename TFunctor, typename TLineMin >
445  void Powell<TFunctor, TLineMin>::minOverAllDirections(typename std::vector<input_type>::iterator & iDirBig, output_prec & delta)
446  {
447  delta = output_prec(-1);
448  output_prec fpext;
449  int count = 0;
450  for (typename std::vector<input_type>::iterator iDir = m_dirs.begin(); iDir != m_dirs.end() ; ++iDir)
451  {
452  fpext = m_fmin;
453  m_fmin = m_linemin(m_pmin, *iDir);
454  //std::cout << "dirmin " << count++ << " " << m_fmin << " : " << m_pmin << std::endl;
455  std::cout << "dirmin " << count++ << " " << m_fmin << std::endl;
456  // Keep this direction if it was the biggest decrease
457  if (fpext - m_fmin > delta)
458  {
459  delta = fpext - m_fmin;
460  //ibig = i
461  iDirBig = iDir;
462  }
463  }
464  }
465 
466  //---------------------------------------------------------------------------
467 
468  // TODO: rewrite all these iterative minimization as steps only, let the user loop himself, that
469  // allows for incredible flexibility, for printing, debugging, whatever.
470  template < typename TFunctor, typename TGradFunctor, typename TLineMin >
471  typename PRConjugateGradient<TFunctor, TGradFunctor, TLineMin>::input_type
472  PRConjugateGradient<TFunctor, TGradFunctor, TLineMin>::operator()(input_type p)
473  {
474  const output_prec EPSILON = 128 * std::numeric_limits<output_prec>::epsilon();
475 
476  std::size_t n = p.size();
477 
478  // We use the function version of resize, and not the member function, because input_type might
479  // not have this member function.
480  resize(m_grad, n);
481  resize(m_conjgrad, n);
482 
483  // temporary variable to hold gradient
484  // TODO: right now, gradtmp is not allocated, this task is left to the gradient
485  // algo. I think it sucks.
486  input_type gradtmp;
487  output_prec gg, dgg, gam;
488  output_prec fp;
489  output_prec dgg2;
490 
491  // initialization
492  fp = this->functor()(p);
493  this->dfunctor()(p, m_grad);
494  std::for_each(m_grad.begin(), m_grad.end(), std::negate<input_prec>());
495  std::copy(m_grad.begin(), m_grad.end(), m_conjgrad.begin());
496 
497  for (this->nIter() = 0; this->nIter() < this->maxIter(); ++this->nIter())
498  {
499  std::cout << "CG iter " << this->nIter();
500  //<< " : ";
501  //std::copy(p.begin(), p.end(), std::ostream_iterator<double>(std::cout, " "));
502  std::cout << std::endl;
503 
504  input_type p_tmp = p;
505  // line minimization along conjugate direction
506  m_fmin = m_linemin(p, m_conjgrad);
507  if (2*std::abs(fp - m_fmin) <= this->ftol() * (std::abs(m_fmin) + std::abs(fp)) + EPSILON)
508  {
509  //return m_pmin;
510  return p;
511  }
512  // Stopping criterion based on maximum motion
513  if (this->min_step() > 0.0)
514  {
515  input_prec mx = 0;
516  for (std::size_t i = 0; i < n; ++i)
517  {
518  if (mx < std::abs(p[i] - p_tmp[i]))
519  {
520  mx = std::abs(m_conjgrad[i]);
521  }
522  }
523  std::cout << "Max motion: " << mx << std::endl;
524  if (mx < this->min_step())
525  {
526  return p;
527  }
528  }
529  fp = m_fmin;
530  this->dfunctor()(p, gradtmp);
531  gg = dgg = 0;
532  dgg2 = 0;
533  for (std::size_t i = 0; i < n; ++i)
534  {
535  gg += square(m_grad[i]);
536  dgg += (gradtmp[i]+m_grad[i]) * gradtmp[i];
537  dgg2 += square(gradtmp[i]);
538  }
539  if (gg < EPSILON)
540  {
541  //return m_pmin;
542  return p;
543  }
544  //std::cout << "dgg: " << dgg << " , dgg2: " << dgg2 << std::endl;
545  gam = dgg / gg;
546  for (std::size_t i = 0; i < n; ++i)
547  {
548  m_grad[i] = -gradtmp[i];
549  m_conjgrad[i] = m_grad[i] + gam * m_conjgrad[i];
550  }
551  }
552  std::cout << "max iter reached" << std::endl;
553  return p;
554  }
555 
556 
557  //---------------------------------------------------------------------------
558 
559  template < typename TFunctor, typename TGradFunctor, typename TLineMin >
560  GradientDescent<TFunctor, TGradFunctor, TLineMin>::GradientDescent
561  (
562  TFunctor functor
563  , TGradFunctor gradfunctor
564  , TLineMin linemin
565  )
566  : Basis(functor, gradfunctor)
567  , m_linemin(linemin)
568  {
569  this->init();
570  }
571 
572  //---------------------------------------------------------------------------
573 
574  template < typename TFunctor, typename TGradFunctor, typename TLineMin >
575  typename GradientDescent<TFunctor, TGradFunctor, TLineMin>::input_type
576  GradientDescent<TFunctor, TGradFunctor, TLineMin>::operator()(input_type p)
577  {
578  const output_prec EPSILON = 128 * std::numeric_limits<output_prec>::epsilon();
579 
580  // temporary variable to hold gradient
581  input_type gradtmp;
582  output_prec fp;
583 
584  // initialization
585  fp = this->functor()(p);
586 
587  for (this->nIter() = 0; this->nIter() < this->maxIter(); ++this->nIter())
588  {
589  std::cout << "GD iter " << this->nIter() << std::endl; // << " : " << p << std::endl;
590  // compute gradient
591  this->dfunctor()(p, m_grad);
592  // negate gradient to point toward smaller values
593  std::for_each(m_grad.begin(), m_grad.end(), std::negate<input_prec>());
594  // line minimization along gradient direction
595  m_fmin = m_linemin(p, m_grad);
596 
597  //if(0)
598  if (2*std::abs(fp - m_fmin) <= this->ftol() * (std::abs(m_fmin) + std::abs(fp)) + EPSILON)
599  {
600  //return m_pmin;
601  return p;
602  }
603  fp = m_fmin;
604  }
605  return p;
606  }
607 
608  //---------------------------------------------------------------------------
609 
610 } // namespace til
611 
612 #endif /*MINTOOLS_H_*/