A.I.M.S algorithms


knn.h
Go to the documentation of this file.
1 /* This software and supporting documentation are distributed by
2  * Institut Federatif de Recherche 49
3  * CEA/NeuroSpin, Batiment 145,
4  * 91191 Gif-sur-Yvette cedex
5  * France
6  *
7  * This software is governed by the CeCILL-B license under
8  * French law and abiding by the rules of distribution of free software.
9  * You can use, modify and/or redistribute the software under the
10  * terms of the CeCILL-B license as circulated by CEA, CNRS
11  * and INRIA at the following URL "http://www.cecill.info".
12  *
13  * As a counterpart to the access to the source code and rights to copy,
14  * modify and redistribute granted by the license, users are provided only
15  * with a limited warranty and the software's author, the holder of the
16  * economic rights, and the successive licensors have only limited
17  * liability.
18  *
19  * In this respect, the user's attention is drawn to the risks associated
20  * with loading, using, modifying and/or developing or reproducing the
21  * software by the user in light of its specific status of free software,
22  * that may mean that it is complicated to manipulate, and that also
23  * therefore means that it is reserved for developers and experienced
24  * professionals having in-depth computer knowledge. Users are therefore
25  * encouraged to load and test the software's suitability as regards their
26  * requirements in conditions enabling the security of their systems and/or
27  * data to be ensured and, more generally, to use and operate it in the
28  * same conditions as regards security.
29  *
30  * The fact that you are presently reading this means that you have had
31  * knowledge of the CeCILL-B license and that you accept its terms.
32  */
33 
34 #ifndef AIMS_MATH_KNN_H
35 #define AIMS_MATH_KNN_H
36 
37 #include <cstdlib>
38 #include <stdlib.h>
39 #include <math.h>
40 #include <vector>
41 #include <algorithm>
42 #include <iostream>
43 #include <string.h>
44 
45 namespace aims
46 {
47 
48 namespace knn
49 {
50 
51 class Distance
52 {
53 public:
54  Distance() {};
55  virtual ~Distance() {};
56 
57  virtual inline const std::string &name(void) const
58  {
59  return _name;
60  }
61 
62  virtual double operator()(const double *v1,
63  const double *v2, unsigned int dim) const = 0;
64 
65  virtual double operator()(const std::vector<double> &v1,
66  const std::vector<double> &v2) const
67  {
68  return (*this)(&v1[0], &v2[0], v1.size());
69  }
70 protected:
71  std::string _name;
72 };
73 
75 {
76 public:
78  {
79  _name = "euclidian";
80  }
81  virtual ~EuclidianDistance() {};
82 
83  virtual double operator()(const double *v1,
84  const double *v2, unsigned int dim) const
85  {
86  double d, s = 0.;
87 
88  for (unsigned int i = 0; i < dim; ++i)
89  {
90  d = v1[i] - v2[i];
91  s += d * d;
92  }
93  return sqrt(s);
94  }
95 };
96 
98 {
99 public:
101  {
102  _name = "squared_euclidian";
103  }
105 
106  virtual double operator()(const double *v1,
107  const double *v2, unsigned int dim) const
108  {
109  double d, s = 0.;
110 
111  for (unsigned int i = 0; i < dim; ++i)
112  {
113  d = v1[i] - v2[i];
114  s += d * d;
115  }
116  return s;
117  }
118 };
119 
121 {
122 public:
124  {
125  _name = "manhattan";
126  }
127  virtual ~ManhattanDistance() {};
128 
129  virtual double operator()(const double *v1,
130  const double *v2, unsigned int dim) const
131  {
132  double s = 0.;
133 
134  for (unsigned int i = 0; i < dim; ++i)
135  s += fabs(v1[i] - v2[i]);
136  return s;
137  }
138 };
139 
140 
142 {
143 public:
145  {
146  _name = "tchebychev";
147  }
148  virtual ~TchebychevDistance() {};
149 
150  virtual double operator()(const double *v1,
151  const double *v2, unsigned int dim) const
152  {
153  double d = 0., max = 0.;
154 
155  for (unsigned int i = 0; i < dim; ++i)
156  {
157  d = fabs(v1[i] - v2[i]);
158  if (d > max) max = d;
159  }
160  return max;
161  }
162 };
163 
164 
165 class Database
166 {
167 public:
168  typedef double * value_type;
169  typedef double ** pointer;
170  typedef double *& reference;
171 
172 
173  Database();
174  Database(double *data, unsigned int size, unsigned int dim);
175  virtual ~Database() {};
176 
177  void init(double *data, unsigned int size, unsigned int dim);
178  int search(const std::vector<double> &v,
179  unsigned int dim) const;
180  int search_with_hole(const std::vector<double> &v,
181  unsigned int dim) const;
182 
183 
184  class Vector
185  {
186  public:
187  Vector(Database *db, unsigned int ind)
188  {
189  init(db, ind);
190  }
191  Vector(const Vector &v)
192  {
193  _dim = v._dim;
194  _data = (double *) malloc(sizeof(double) * _dim);
195  memcpy(_data, v._data, sizeof(double) * _dim);
196  _own_data = true;
197  }
198 
199  double *operator*(void)
200  {
201  return _data;
202  };
203 
204  const double *operator*(void) const
205  {
206  return _data;
207  };
208 
209  virtual ~Vector()
210  {
211  this->free();
212  }
213 
214  inline void init(Database *db, unsigned int ind)
215  {
216  if (db != NULL)
217  {
218  _data = (*db)[ind];
219  _dim = db->dim();
220  }
221  else
222  {
223  _data = NULL;
224  _dim = 0;
225  }
226  _own_data = false;
227  }
228 
229  inline void free()
230  {
231  if (_own_data) ::free(_data);
232  }
233 
234  inline double operator()(unsigned int dim) const
235  {
236  return _data[dim];
237  }
238 
239  inline void update(const Vector &v)
240  {
241  _data = v._data;
242  _dim = v._dim;
243  _own_data = false;
244  }
245 
246  inline void update(Database *db, unsigned int ind)
247  {
248  this->free();
249  init(db, ind);
250  }
251 
252  inline Vector &operator =(const Vector &v)
253  {
254  _dim = v._dim;
255  if (_data == NULL)
256  {
257  _data = (double *)malloc(sizeof(double)* _dim);
258  _own_data = true;
259  }
260  memcpy(_data, v._data, sizeof(double) * _dim);
261  return *this;
262  }
263  private:
264  bool _own_data;
265  double *_data;
266  unsigned int _dim;
267  };
268 
270  {
271  public:
272  DatabaseCompare(unsigned int dim) : _dim(dim) {};
273 
274  bool operator()(const Vector &v1, const Vector &v2) const
275  {
276  return v1(_dim) < v2(_dim);
277  }
278 
279  private:
280  unsigned int _dim;
281  };
282 
283  class iterator
284  {
285  public:
286  typedef std::ptrdiff_t difference_type;
287  typedef std::bidirectional_iterator_tag iterator_category;
289  typedef Vector* pointer;
290  typedef Vector& reference;
291 
292 
293 
294  iterator() : _db(NULL), _ind(0), _vector(Vector(NULL, 0)) {}
295  iterator(Database *db, unsigned int ind=0) :
296  _db(db), _ind(ind), _vector(Vector(db, _ind)){}
297  iterator(const iterator &it) : _db(it._db), _ind(it._ind),
298  _vector(it._vector) {};
299  ~iterator() {};
300 
301  inline iterator &operator =(const iterator &it)
302  {
303  _ind = it._ind;
304  _db = it._db;
305  return *this;
306  }
307 
308  bool operator!=(const iterator & it)
309  {
310  return _ind != it._ind;
311  };
312 
313  bool operator==(const iterator & it)
314  {
315  return _ind == it._ind;
316  };
317 
319  {
320  if (_ind < _db->size()) ++_ind;
321  return *this;
322  };
323 
325  {
326  const iterator tmp = *this;
327  if (_ind < _db->size()) _ind++;
328  return tmp;
329  };
330 
332  {
333  --_ind;
334  return *this;
335  };
336 
338  {
339  const iterator tmp = *this;
340  _ind--;
341  return tmp;
342  };
343 
344  int operator-(const iterator &it)
345  {
346  return _ind - it._ind;
347  };
348 
349  int operator+(const iterator &it)
350  {
351  return _ind + it._ind;
352  };
353 
354  iterator operator-(unsigned int ind)
355  {
356  return iterator(_db, _ind - ind);
357  };
358 
359  iterator operator+(unsigned int ind)
360  {
361  return iterator(_db, _ind + ind);
362  };
363 
365  {
366  _vector.update(_db, _ind);
367  return _vector;
368  };
369 
370  bool operator<(const iterator &it)
371  {
372  return _ind < it._ind;
373  };
374 
375  private:
376  Database *_db;
377  unsigned int _ind;
378  Vector _vector;
379  };
380 
382  {
383  return iterator(this, 0);
384  }
385 
386  iterator end(void)
387  {
388  return iterator(this, _size);
389  }
390 
391  inline unsigned int size(void) const
392  {
393  return _size;
394  }
395 
396  inline unsigned int dim(void) const
397  {
398  return _dim;
399  }
400 
401  inline double operator()(unsigned int x, unsigned int y) const
402  {
403  return _data[x * _dim + y];
404  }
405 
406  inline double *operator[](unsigned int ind)
407  {
408  return &_data[ind * _dim];
409  }
410 
411  inline const double *operator[](unsigned int ind) const
412  {
413  return &_data[ind * _dim];
414  }
415 
416  double variance_along_dim(unsigned int dim) const;
417 
418  void sort(unsigned int dim);
419 
420  inline const std::vector<bool> &holes() const
421  {
422  return _holes;
423  }
424 
425  inline void setHole(unsigned int ind, bool status)
426  {
427  _holes[ind] = status;
428  }
429 
430  inline void removeHoles(void)
431  {
432  _holes = std::vector<bool>(_size, false);
433  }
434 
435 protected:
436  double *_data;
437  unsigned int _size;
438  unsigned int _dim;
439  std::vector<bool> _holes;
440 };
441 
442 
443 class MultiDatabase : public Database
444 {
445 public:
446  MultiDatabase(double *data, unsigned int n, unsigned int dim) :
447  Database(data, n, dim) {};
448 };
449 
450 class Heap
451 {
452 public:
453  class MiniHeap
454  {
455  public:
456  MiniHeap(double key, const void *value) :
457  _key(key), _value(value) {};
458  virtual ~MiniHeap() {};
459 
460  MiniHeap(const MiniHeap &h) : _key(h._key), _value(h._value) {}
461 
462  inline MiniHeap &operator =(const MiniHeap &h)
463  {
464  _key = h._key;
465  _value = h._value;
466  return *this;
467  }
468 
469  inline double key() const
470  {
471  return _key;
472  }
473 
474  inline const void *value() const
475  {
476  return _value;
477  }
478 
479  inline bool operator < (const MiniHeap &h) const
480  {
481  return _key < h._key;
482  }
483 
484  inline bool operator > (const MiniHeap &h) const
485  {
486  return _key > h._key;
487  }
488 
489  private:
490  double _key;
491  const void *_value;
492  };
493 
494  Heap(unsigned int n) : _n(0), _limit(n),
495  _list(std::vector<MiniHeap>(n + 1,
496  MiniHeap(HUGE_VAL, NULL))) {};
497  virtual ~Heap() {};
498 
499  void exchange(unsigned int a, unsigned int b);
500  void erase_worst(void);
501  void insert(double key, const void *value);
502  std::pair<std::vector<unsigned int>, std::vector<double> >
503  toVectors(const Database &_db);
504 
505  const MiniHeap &operator[](unsigned int ind) const
506  {
507  return _list[ind];
508  }
509 
510  inline double get_worst_value()
511  {
512  return _list[0].key();
513  }
514 
515 private:
516  unsigned int _n;
517  unsigned int _limit;
518  std::vector<MiniHeap> _list;
519 };
520 
521 class Knn
522 {
523 public:
524  Knn(Database &db, unsigned int k,
525  Distance *distance = new SquaredEuclidianDistance()) :
526  _db(db), _k(k), _distance(distance), _distance_n(-1) {};
527  virtual ~Knn() {};
528 
529  virtual std::pair<std::vector<unsigned int>, std::vector<double> >
530  find(const std::vector<double> &v) = 0;
531 
532 protected:
536  unsigned int _k;
541 };
542 
543 class KnnBruteForce : public Knn
544 {
545 public:
546  KnnBruteForce(Database &db, unsigned int k,
547  Distance *distance = new SquaredEuclidianDistance()) :
548  Knn(db, k, distance) {};
549  virtual ~KnnBruteForce() {};
550 
551  virtual std::pair<std::vector<unsigned int>, std::vector<double> >
552  find(const std::vector<double> &v);
553 };
554 
555 class KnnFriedman : public Knn
556 {
557 public:
558  KnnFriedman(Database &db, unsigned int k,
559  Distance *distance = new SquaredEuclidianDistance()) :
560  Knn(db, k, distance) {};
561  virtual ~KnnFriedman() {};
562 
563  virtual void precompute(void) = 0;
564  virtual std::pair<std::vector<unsigned int>, std::vector<double> >
565  find(const std::vector<double> &v) = 0;
566 };
567 
569 {
570 public:
571  KnnGlobalFriedman(Database &db, unsigned int k,
572  Distance *distance = new SquaredEuclidianDistance()) :
573  KnnFriedman(db, k, distance) {};
574  virtual ~KnnGlobalFriedman() {};
575 
576  virtual void precompute(void);
577  std::pair<std::vector<unsigned int>, std::vector<double> >
578  find(const std::vector<double> &v);
579 
580 private:
581  unsigned int _best_dim;
582 };
583 
584 }; /* end of namespace knn */
585 }; /* end of namespace aims */
586 
587 #endif
int search_with_hole(const std::vector< double > &v, unsigned int dim) const
double *& reference
Definition: knn.h:170
virtual std::pair< std::vector< unsigned int >, std::vector< double > > find(const std::vector< double > &v)=0
double * operator*(void)
Definition: knn.h:199
virtual double operator()(const std::vector< double > &v1, const std::vector< double > &v2) const
Definition: knn.h:65
unsigned int size(void) const
Definition: knn.h:391
virtual ~Knn()
Definition: knn.h:527
iterator end(void)
Definition: knn.h:386
double * value_type
Definition: knn.h:168
float max(float x, float y)
Definition: thickness.h:97
double key() const
Definition: knn.h:469
MiniHeap(const MiniHeap &h)
Definition: knn.h:460
int search(const std::vector< double > &v, unsigned int dim) const
bool operator<(const iterator &it)
Definition: knn.h:370
virtual ~TchebychevDistance()
Definition: knn.h:148
iterator & operator=(const iterator &it)
Definition: knn.h:301
iterator operator++(void)
Definition: knn.h:318
virtual std::pair< std::vector< unsigned int >, std::vector< double > > find(const std::vector< double > &v)
void erase_worst(void)
bool operator()(const Vector &v1, const Vector &v2) const
Definition: knn.h:274
const std::vector< bool > & holes() const
Definition: knn.h:420
double * _data
Definition: knn.h:436
MultiDatabase(double *data, unsigned int n, unsigned int dim)
Definition: knn.h:446
int _distance_n
number of computed distance
Definition: knn.h:540
iterator operator-(unsigned int ind)
Definition: knn.h:354
unsigned int _size
Definition: knn.h:437
virtual ~MiniHeap()
Definition: knn.h:458
virtual ~EuclidianDistance()
Definition: knn.h:81
iterator operator--(void)
Definition: knn.h:331
iterator begin(void)
Definition: knn.h:381
iterator operator--(int)
Definition: knn.h:337
void exchange(unsigned int a, unsigned int b)
std::string _name
Definition: knn.h:71
Vector(const Vector &v)
Definition: knn.h:191
std::vector< bool > _holes
Definition: knn.h:439
Heap(unsigned int n)
Definition: knn.h:494
MiniHeap & operator=(const MiniHeap &h)
Definition: knn.h:462
void setHole(unsigned int ind, bool status)
Definition: knn.h:425
void update(Database *db, unsigned int ind)
Definition: knn.h:246
virtual double operator()(const double *v1, const double *v2, unsigned int dim) const
Definition: knn.h:106
virtual ~KnnGlobalFriedman()
Definition: knn.h:574
const void * value() const
Definition: knn.h:474
MiniHeap(double key, const void *value)
Definition: knn.h:456
double operator()(unsigned int dim) const
Definition: knn.h:234
KnnFriedman(Database &db, unsigned int k, Distance *distance=new SquaredEuclidianDistance())
Definition: knn.h:558
virtual ~Heap()
Definition: knn.h:497
bool operator==(const iterator &it)
Definition: knn.h:313
Database & _db
database wrapper of data
Definition: knn.h:534
Vector & operator=(const Vector &v)
Definition: knn.h:252
double * operator[](unsigned int ind)
Definition: knn.h:406
iterator operator++(int)
Definition: knn.h:324
iterator(const iterator &it)
Definition: knn.h:297
Distance * _distance
distance used in nearest neighbours computations
Definition: knn.h:538
Vector & operator*(void)
Definition: knn.h:364
bool operator>(const MiniHeap &h) const
Definition: knn.h:484
KnnBruteForce(Database &db, unsigned int k, Distance *distance=new SquaredEuclidianDistance())
Definition: knn.h:546
virtual double operator()(const double *v1, const double *v2, unsigned int dim) const
Definition: knn.h:150
unsigned int _dim
Definition: knn.h:438
virtual std::pair< std::vector< unsigned int >, std::vector< double > > find(const std::vector< double > &v)=0
virtual ~KnnFriedman()
Definition: knn.h:561
double get_worst_value()
Definition: knn.h:510
void init(double *data, unsigned int size, unsigned int dim)
virtual void precompute(void)
KnnGlobalFriedman(Database &db, unsigned int k, Distance *distance=new SquaredEuclidianDistance())
Definition: knn.h:571
std::ptrdiff_t difference_type
Definition: knn.h:286
const MiniHeap & operator[](unsigned int ind) const
Definition: knn.h:505
int operator+(const iterator &it)
Definition: knn.h:349
std::pair< std::vector< unsigned int >, std::vector< double > > toVectors(const Database &_db)
virtual void precompute(void)=0
void sort(unsigned int dim)
iterator operator+(unsigned int ind)
Definition: knn.h:359
virtual ~KnnBruteForce()
Definition: knn.h:549
virtual double operator()(const double *v1, const double *v2, unsigned int dim) const =0
void removeHoles(void)
Definition: knn.h:430
bool operator!=(const iterator &it)
Definition: knn.h:308
double operator()(unsigned int x, unsigned int y) const
Definition: knn.h:401
void update(const Vector &v)
Definition: knn.h:239
virtual ~ManhattanDistance()
Definition: knn.h:127
DatabaseCompare(unsigned int dim)
Definition: knn.h:272
virtual double operator()(const double *v1, const double *v2, unsigned int dim) const
Definition: knn.h:129
void init(Database *db, unsigned int ind)
Definition: knn.h:214
virtual ~Distance()
Definition: knn.h:55
double ** pointer
Definition: knn.h:169
void insert(double key, const void *value)
virtual double operator()(const double *v1, const double *v2, unsigned int dim) const
Definition: knn.h:83
std::bidirectional_iterator_tag iterator_category
Definition: knn.h:287
virtual const std::string & name(void) const
Definition: knn.h:57
const double * operator[](unsigned int ind) const
Definition: knn.h:411
unsigned int dim(void) const
Definition: knn.h:396
const double * operator*(void) const
Definition: knn.h:204
double variance_along_dim(unsigned int dim) const
bool operator<(const MiniHeap &h) const
Definition: knn.h:479
iterator(Database *db, unsigned int ind=0)
Definition: knn.h:295
std::pair< std::vector< unsigned int >, std::vector< double > > find(const std::vector< double > &v)
int operator-(const iterator &it)
Definition: knn.h:344
Knn(Database &db, unsigned int k, Distance *distance=new SquaredEuclidianDistance())
Definition: knn.h:524
virtual ~Database()
Definition: knn.h:175
unsigned int _k
k : number of nearest neighbours
Definition: knn.h:536
Vector(Database *db, unsigned int ind)
Definition: knn.h:187