Joe Verbout
/
main
opencv on mbed
Embed:
(wiki syntax)
Show/hide line numbers
kdtree_index.h
00001 /*********************************************************************** 00002 * Software License Agreement (BSD License) 00003 * 00004 * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved. 00005 * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved. 00006 * 00007 * THE BSD LICENSE 00008 * 00009 * Redistribution and use in source and binary forms, with or without 00010 * modification, are permitted provided that the following conditions 00011 * are met: 00012 * 00013 * 1. Redistributions of source code must retain the above copyright 00014 * notice, this list of conditions and the following disclaimer. 00015 * 2. Redistributions in binary form must reproduce the above copyright 00016 * notice, this list of conditions and the following disclaimer in the 00017 * documentation and/or other materials provided with the distribution. 00018 * 00019 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 00020 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00021 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 00022 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 00023 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 00024 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00025 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00026 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 00028 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00029 *************************************************************************/ 00030 00031 #ifndef OPENCV_FLANN_KDTREE_INDEX_H_ 00032 #define OPENCV_FLANN_KDTREE_INDEX_H_ 00033 00034 #include <algorithm> 00035 #include <map> 00036 #include <cassert> 00037 #include <cstring> 00038 00039 #include "general.h" 00040 #include "nn_index.h" 00041 #include "dynamic_bitset.h" 00042 #include "matrix.h" 00043 #include "result_set.h" 00044 #include "heap.h" 00045 #include "allocator.h" 00046 #include "random.h" 00047 #include "saving.h" 00048 00049 00050 namespace cvflann 00051 { 00052 00053 struct KDTreeIndexParams : public IndexParams 00054 { 00055 KDTreeIndexParams(int trees = 4) 00056 { 00057 (*this)["algorithm"] = FLANN_INDEX_KDTREE; 00058 (*this)["trees"] = trees; 00059 } 00060 }; 00061 00062 00063 /** 00064 * Randomized kd-tree index 00065 * 00066 * Contains the k-d trees and other information for indexing a set of points 00067 * for nearest-neighbor matching. 00068 */ 00069 template <typename Distance> 00070 class KDTreeIndex : public NNIndex<Distance> 00071 { 00072 public: 00073 typedef typename Distance::ElementType ElementType; 00074 typedef typename Distance::ResultType DistanceType; 00075 00076 00077 /** 00078 * KDTree constructor 00079 * 00080 * Params: 00081 * inputData = dataset with the input features 00082 * params = parameters passed to the kdtree algorithm 00083 */ 00084 KDTreeIndex(const Matrix<ElementType> & inputData, const IndexParams& params = KDTreeIndexParams(), 00085 Distance d = Distance() ) : 00086 dataset_(inputData), index_params_(params), distance_(d) 00087 { 00088 size_ = dataset_.rows; 00089 veclen_ = dataset_.cols; 00090 00091 trees_ = get_param(index_params_,"trees",4); 00092 tree_roots_ = new NodePtr[trees_]; 00093 00094 // Create a permutable array of indices to the input vectors. 00095 vind_.resize(size_); 00096 for (size_t i = 0; i < size_; ++i) { 00097 vind_[i] = int(i); 00098 } 00099 00100 mean_ = new DistanceType[veclen_]; 00101 var_ = new DistanceType[veclen_]; 00102 } 00103 00104 00105 KDTreeIndex(const KDTreeIndex&); 00106 KDTreeIndex& operator=(const KDTreeIndex&); 00107 00108 /** 00109 * Standard destructor 00110 */ 00111 ~KDTreeIndex() 00112 { 00113 if (tree_roots_!=NULL) { 00114 delete[] tree_roots_; 00115 } 00116 delete[] mean_; 00117 delete[] var_; 00118 } 00119 00120 /** 00121 * Builds the index 00122 */ 00123 void buildIndex() 00124 { 00125 /* Construct the randomized trees. */ 00126 for (int i = 0; i < trees_; i++) { 00127 /* Randomize the order of vectors to allow for unbiased sampling. */ 00128 std::random_shuffle(vind_.begin(), vind_.end()); 00129 tree_roots_[i] = divideTree(&vind_[0], int(size_) ); 00130 } 00131 } 00132 00133 00134 flann_algorithm_t getType () const 00135 { 00136 return FLANN_INDEX_KDTREE; 00137 } 00138 00139 00140 void saveIndex(FILE* stream) 00141 { 00142 save_value(stream, trees_); 00143 for (int i=0; i<trees_; ++i) { 00144 save_tree(stream, tree_roots_[i]); 00145 } 00146 } 00147 00148 00149 00150 void loadIndex(FILE* stream) 00151 { 00152 load_value(stream, trees_); 00153 if (tree_roots_!=NULL) { 00154 delete[] tree_roots_; 00155 } 00156 tree_roots_ = new NodePtr[trees_]; 00157 for (int i=0; i<trees_; ++i) { 00158 load_tree(stream,tree_roots_[i]); 00159 } 00160 00161 index_params_["algorithm"] = getType (); 00162 index_params_["trees"] = tree_roots_; 00163 } 00164 00165 /** 00166 * Returns size of index. 00167 */ 00168 size_t size() const 00169 { 00170 return size_; 00171 } 00172 00173 /** 00174 * Returns the length of an index feature. 00175 */ 00176 size_t veclen() const 00177 { 00178 return veclen_; 00179 } 00180 00181 /** 00182 * Computes the inde memory usage 00183 * Returns: memory used by the index 00184 */ 00185 int usedMemory() const 00186 { 00187 return int(pool_.usedMemory+pool_.wastedMemory+dataset_.rows*sizeof(int)); // pool memory and vind array memory 00188 } 00189 00190 /** 00191 * Find set of nearest neighbors to vec. Their indices are stored inside 00192 * the result object. 00193 * 00194 * Params: 00195 * result = the result object in which the indices of the nearest-neighbors are stored 00196 * vec = the vector for which to search the nearest neighbors 00197 * maxCheck = the maximum number of restarts (in a best-bin-first manner) 00198 */ 00199 void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) 00200 { 00201 int maxChecks = get_param(searchParams,"checks", 32); 00202 float epsError = 1+get_param(searchParams,"eps",0.0f); 00203 00204 if (maxChecks==FLANN_CHECKS_UNLIMITED) { 00205 getExactNeighbors(result, vec, epsError); 00206 } 00207 else { 00208 getNeighbors(result, vec, maxChecks, epsError); 00209 } 00210 } 00211 00212 IndexParams getParameters () const 00213 { 00214 return index_params_; 00215 } 00216 00217 private: 00218 00219 00220 /*--------------------- Internal Data Structures --------------------------*/ 00221 struct Node 00222 { 00223 /** 00224 * Dimension used for subdivision. 00225 */ 00226 int divfeat; 00227 /** 00228 * The values used for subdivision. 00229 */ 00230 DistanceType divval; 00231 /** 00232 * The child nodes. 00233 */ 00234 Node* child1, * child2; 00235 }; 00236 typedef Node* NodePtr; 00237 typedef BranchStruct<NodePtr, DistanceType> BranchSt; 00238 typedef BranchSt* Branch; 00239 00240 00241 00242 void save_tree(FILE* stream, NodePtr tree) 00243 { 00244 save_value(stream, *tree); 00245 if (tree->child1!=NULL) { 00246 save_tree(stream, tree->child1); 00247 } 00248 if (tree->child2!=NULL) { 00249 save_tree(stream, tree->child2); 00250 } 00251 } 00252 00253 00254 void load_tree(FILE* stream, NodePtr& tree) 00255 { 00256 tree = pool_.allocate<Node>(); 00257 load_value(stream, *tree); 00258 if (tree->child1!=NULL) { 00259 load_tree(stream, tree->child1); 00260 } 00261 if (tree->child2!=NULL) { 00262 load_tree(stream, tree->child2); 00263 } 00264 } 00265 00266 00267 /** 00268 * Create a tree node that subdivides the list of vecs from vind[first] 00269 * to vind[last]. The routine is called recursively on each sublist. 00270 * Place a pointer to this new tree node in the location pTree. 00271 * 00272 * Params: pTree = the new node to create 00273 * first = index of the first vector 00274 * last = index of the last vector 00275 */ 00276 NodePtr divideTree(int* ind, int count) 00277 { 00278 NodePtr node = pool_.allocate<Node>(); // allocate memory 00279 00280 /* If too few exemplars remain, then make this a leaf node. */ 00281 if ( count == 1) { 00282 node->child1 = node->child2 = NULL; /* Mark as leaf node. */ 00283 node->divfeat = *ind; /* Store index of this vec. */ 00284 } 00285 else { 00286 int idx; 00287 int cutfeat; 00288 DistanceType cutval; 00289 meanSplit(ind, count, idx, cutfeat, cutval); 00290 00291 node->divfeat = cutfeat; 00292 node->divval = cutval; 00293 node->child1 = divideTree(ind, idx); 00294 node->child2 = divideTree(ind+idx, count-idx); 00295 } 00296 00297 return node; 00298 } 00299 00300 00301 /** 00302 * Choose which feature to use in order to subdivide this set of vectors. 00303 * Make a random choice among those with the highest variance, and use 00304 * its variance as the threshold value. 00305 */ 00306 void meanSplit(int* ind, int count, int& index, int& cutfeat, DistanceType& cutval) 00307 { 00308 memset(mean_,0,veclen_*sizeof(DistanceType)); 00309 memset(var_,0,veclen_*sizeof(DistanceType)); 00310 00311 /* Compute mean values. Only the first SAMPLE_MEAN values need to be 00312 sampled to get a good estimate. 00313 */ 00314 int cnt = std::min((int)SAMPLE_MEAN+1, count); 00315 for (int j = 0; j < cnt; ++j) { 00316 ElementType* v = dataset_[ind[j]]; 00317 for (size_t k=0; k<veclen_; ++k) { 00318 mean_[k] += v[k]; 00319 } 00320 } 00321 for (size_t k=0; k<veclen_; ++k) { 00322 mean_[k] /= cnt; 00323 } 00324 00325 /* Compute variances (no need to divide by count). */ 00326 for (int j = 0; j < cnt; ++j) { 00327 ElementType* v = dataset_[ind[j]]; 00328 for (size_t k=0; k<veclen_; ++k) { 00329 DistanceType dist = v[k] - mean_[k]; 00330 var_[k] += dist * dist; 00331 } 00332 } 00333 /* Select one of the highest variance indices at random. */ 00334 cutfeat = selectDivision(var_); 00335 cutval = mean_[cutfeat]; 00336 00337 int lim1, lim2; 00338 planeSplit(ind, count, cutfeat, cutval, lim1, lim2); 00339 00340 if (lim1>count/2) index = lim1; 00341 else if (lim2<count/2) index = lim2; 00342 else index = count/2; 00343 00344 /* If either list is empty, it means that all remaining features 00345 * are identical. Split in the middle to maintain a balanced tree. 00346 */ 00347 if ((lim1==count)||(lim2==0)) index = count/2; 00348 } 00349 00350 00351 /** 00352 * Select the top RAND_DIM largest values from v and return the index of 00353 * one of these selected at random. 00354 */ 00355 int selectDivision(DistanceType* v) 00356 { 00357 int num = 0; 00358 size_t topind[RAND_DIM]; 00359 00360 /* Create a list of the indices of the top RAND_DIM values. */ 00361 for (size_t i = 0; i < veclen_; ++i) { 00362 if ((num < RAND_DIM)||(v[i] > v[topind[num-1]])) { 00363 /* Put this element at end of topind. */ 00364 if (num < RAND_DIM) { 00365 topind[num++] = i; /* Add to list. */ 00366 } 00367 else { 00368 topind[num-1] = i; /* Replace last element. */ 00369 } 00370 /* Bubble end value down to right location by repeated swapping. */ 00371 int j = num - 1; 00372 while (j > 0 && v[topind[j]] > v[topind[j-1]]) { 00373 std::swap(topind[j], topind[j-1]); 00374 --j; 00375 } 00376 } 00377 } 00378 /* Select a random integer in range [0,num-1], and return that index. */ 00379 int rnd = rand_int(num); 00380 return (int)topind[rnd]; 00381 } 00382 00383 00384 /** 00385 * Subdivide the list of points by a plane perpendicular on axe corresponding 00386 * to the 'cutfeat' dimension at 'cutval' position. 00387 * 00388 * On return: 00389 * dataset[ind[0..lim1-1]][cutfeat]<cutval 00390 * dataset[ind[lim1..lim2-1]][cutfeat]==cutval 00391 * dataset[ind[lim2..count]][cutfeat]>cutval 00392 */ 00393 void planeSplit(int* ind, int count, int cutfeat, DistanceType cutval, int& lim1, int& lim2) 00394 { 00395 /* Move vector indices for left subtree to front of list. */ 00396 int left = 0; 00397 int right = count-1; 00398 for (;; ) { 00399 while (left<=right && dataset_[ind[left]][cutfeat]<cutval) ++left; 00400 while (left<=right && dataset_[ind[right]][cutfeat]>=cutval) --right; 00401 if (left>right) break; 00402 std::swap(ind[left], ind[right]); ++left; --right; 00403 } 00404 lim1 = left; 00405 right = count-1; 00406 for (;; ) { 00407 while (left<=right && dataset_[ind[left]][cutfeat]<=cutval) ++left; 00408 while (left<=right && dataset_[ind[right]][cutfeat]>cutval) --right; 00409 if (left>right) break; 00410 std::swap(ind[left], ind[right]); ++left; --right; 00411 } 00412 lim2 = left; 00413 } 00414 00415 /** 00416 * Performs an exact nearest neighbor search. The exact search performs a full 00417 * traversal of the tree. 00418 */ 00419 void getExactNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, float epsError) 00420 { 00421 // checkID -= 1; /* Set a different unique ID for each search. */ 00422 00423 if (trees_ > 1) { 00424 fprintf(stderr,"It doesn't make any sense to use more than one tree for exact search"); 00425 } 00426 if (trees_>0) { 00427 searchLevelExact(result, vec, tree_roots_[0], 0.0, epsError); 00428 } 00429 assert(result.full()); 00430 } 00431 00432 /** 00433 * Performs the approximate nearest-neighbor search. The search is approximate 00434 * because the tree traversal is abandoned after a given number of descends in 00435 * the tree. 00436 */ 00437 void getNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, int maxCheck, float epsError) 00438 { 00439 int i; 00440 BranchSt branch; 00441 00442 int checkCount = 0; 00443 Heap<BranchSt>* heap = new Heap<BranchSt>((int)size_); 00444 DynamicBitset checked(size_); 00445 00446 /* Search once through each tree down to root. */ 00447 for (i = 0; i < trees_; ++i) { 00448 searchLevel(result, vec, tree_roots_[i], 0, checkCount, maxCheck, epsError, heap, checked); 00449 } 00450 00451 /* Keep searching other branches from heap until finished. */ 00452 while ( heap->popMin(branch) && (checkCount < maxCheck || !result.full() )) { 00453 searchLevel(result, vec, branch.node, branch.mindist, checkCount, maxCheck, epsError, heap, checked); 00454 } 00455 00456 delete heap; 00457 00458 assert(result.full()); 00459 } 00460 00461 00462 /** 00463 * Search starting from a given node of the tree. Based on any mismatches at 00464 * higher levels, all exemplars below this level must have a distance of 00465 * at least "mindistsq". 00466 */ 00467 void searchLevel(ResultSet<DistanceType>& result_set, const ElementType* vec, NodePtr node, DistanceType mindist, int& checkCount, int maxCheck, 00468 float epsError, Heap<BranchSt>* heap, DynamicBitset& checked) 00469 { 00470 if (result_set.worstDist()<mindist) { 00471 // printf("Ignoring branch, too far\n"); 00472 return; 00473 } 00474 00475 /* If this is a leaf node, then do check and return. */ 00476 if ((node->child1 == NULL)&&(node->child2 == NULL)) { 00477 /* Do not check same node more than once when searching multiple trees. 00478 Once a vector is checked, we set its location in vind to the 00479 current checkID. 00480 */ 00481 int index = node->divfeat; 00482 if ( checked.test(index) || ((checkCount>=maxCheck)&& result_set.full()) ) return; 00483 checked.set(index); 00484 checkCount++; 00485 00486 DistanceType dist = distance_(dataset_[index], vec, veclen_); 00487 result_set.addPoint(dist,index); 00488 00489 return; 00490 } 00491 00492 /* Which child branch should be taken first? */ 00493 ElementType val = vec[node->divfeat]; 00494 DistanceType diff = val - node->divval; 00495 NodePtr bestChild = (diff < 0) ? node->child1 : node->child2; 00496 NodePtr otherChild = (diff < 0) ? node->child2 : node->child1; 00497 00498 /* Create a branch record for the branch not taken. Add distance 00499 of this feature boundary (we don't attempt to correct for any 00500 use of this feature in a parent node, which is unlikely to 00501 happen and would have only a small effect). Don't bother 00502 adding more branches to heap after halfway point, as cost of 00503 adding exceeds their value. 00504 */ 00505 00506 DistanceType new_distsq = mindist + distance_.accum_dist(val, node->divval, node->divfeat); 00507 // if (2 * checkCount < maxCheck || !result.full()) { 00508 if ((new_distsq*epsError < result_set.worstDist())|| !result_set.full()) { 00509 heap->insert( BranchSt(otherChild, new_distsq) ); 00510 } 00511 00512 /* Call recursively to search next level down. */ 00513 searchLevel(result_set, vec, bestChild, mindist, checkCount, maxCheck, epsError, heap, checked); 00514 } 00515 00516 /** 00517 * Performs an exact search in the tree starting from a node. 00518 */ 00519 void searchLevelExact(ResultSet<DistanceType>& result_set, const ElementType* vec, const NodePtr node, DistanceType mindist, const float epsError) 00520 { 00521 /* If this is a leaf node, then do check and return. */ 00522 if ((node->child1 == NULL)&&(node->child2 == NULL)) { 00523 int index = node->divfeat; 00524 DistanceType dist = distance_(dataset_[index], vec, veclen_); 00525 result_set.addPoint(dist,index); 00526 return; 00527 } 00528 00529 /* Which child branch should be taken first? */ 00530 ElementType val = vec[node->divfeat]; 00531 DistanceType diff = val - node->divval; 00532 NodePtr bestChild = (diff < 0) ? node->child1 : node->child2; 00533 NodePtr otherChild = (diff < 0) ? node->child2 : node->child1; 00534 00535 /* Create a branch record for the branch not taken. Add distance 00536 of this feature boundary (we don't attempt to correct for any 00537 use of this feature in a parent node, which is unlikely to 00538 happen and would have only a small effect). Don't bother 00539 adding more branches to heap after halfway point, as cost of 00540 adding exceeds their value. 00541 */ 00542 00543 DistanceType new_distsq = mindist + distance_.accum_dist(val, node->divval, node->divfeat); 00544 00545 /* Call recursively to search next level down. */ 00546 searchLevelExact(result_set, vec, bestChild, mindist, epsError); 00547 00548 if (new_distsq*epsError<=result_set.worstDist()) { 00549 searchLevelExact(result_set, vec, otherChild, new_distsq, epsError); 00550 } 00551 } 00552 00553 00554 private: 00555 00556 enum 00557 { 00558 /** 00559 * To improve efficiency, only SAMPLE_MEAN random values are used to 00560 * compute the mean and variance at each level when building a tree. 00561 * A value of 100 seems to perform as well as using all values. 00562 */ 00563 SAMPLE_MEAN = 100, 00564 /** 00565 * Top random dimensions to consider 00566 * 00567 * When creating random trees, the dimension on which to subdivide is 00568 * selected at random from among the top RAND_DIM dimensions with the 00569 * highest variance. A value of 5 works well. 00570 */ 00571 RAND_DIM=5 00572 }; 00573 00574 00575 /** 00576 * Number of randomized trees that are used 00577 */ 00578 int trees_; 00579 00580 /** 00581 * Array of indices to vectors in the dataset. 00582 */ 00583 std::vector<int> vind_; 00584 00585 /** 00586 * The dataset used by this index 00587 */ 00588 const Matrix<ElementType> dataset_; 00589 00590 IndexParams index_params_; 00591 00592 size_t size_; 00593 size_t veclen_; 00594 00595 00596 DistanceType* mean_; 00597 DistanceType* var_; 00598 00599 00600 /** 00601 * Array of k-d trees used to find neighbours. 00602 */ 00603 NodePtr* tree_roots_; 00604 00605 /** 00606 * Pooled memory allocator. 00607 * 00608 * Using a pooled memory allocator is more efficient 00609 * than allocating memory directly when there is a large 00610 * number small of memory allocations. 00611 */ 00612 PooledAllocator pool_; 00613 00614 Distance distance_; 00615 00616 00617 }; // class KDTreeForest 00618 00619 } 00620 00621 #endif //OPENCV_FLANN_KDTREE_INDEX_H_ 00622
Generated on Tue Jul 12 2022 16:42:38 by 1.7.2