Joe Verbout
/
main
opencv on mbed
Embed:
(wiki syntax)
Show/hide line numbers
hierarchical_clustering_index.h
00001 /*********************************************************************** 00002 * Software License Agreement (BSD License) 00003 * 00004 * Copyright 2008-2011 Marius Muja (mariusm@cs.ubc.ca). All rights reserved. 00005 * Copyright 2008-2011 David G. Lowe (lowe@cs.ubc.ca). All rights reserved. 00006 * 00007 * THE BSD LICENSE 00008 * 00009 * Redistribution and use in source and binary forms, with or without 00010 * modification, are permitted provided that the following conditions 00011 * are met: 00012 * 00013 * 1. Redistributions of source code must retain the above copyright 00014 * notice, this list of conditions and the following disclaimer. 00015 * 2. Redistributions in binary form must reproduce the above copyright 00016 * notice, this list of conditions and the following disclaimer in the 00017 * documentation and/or other materials provided with the distribution. 00018 * 00019 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 00020 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00021 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 00022 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 00023 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 00024 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00025 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00026 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 00028 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00029 *************************************************************************/ 00030 00031 #ifndef OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_ 00032 #define OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_ 00033 00034 #include <algorithm> 00035 #include <map> 00036 #include <cassert> 00037 #include <limits> 00038 #include <cmath> 00039 00040 #include "general.h" 00041 #include "nn_index.h" 00042 #include "dist.h" 00043 #include "matrix.h" 00044 #include "result_set.h" 00045 #include "heap.h" 00046 #include "allocator.h" 00047 #include "random.h" 00048 #include "saving.h" 00049 00050 00051 namespace cvflann 00052 { 00053 00054 struct HierarchicalClusteringIndexParams : public IndexParams 00055 { 00056 HierarchicalClusteringIndexParams(int branching = 32, 00057 flann_centers_init_t centers_init = FLANN_CENTERS_RANDOM, 00058 int trees = 4, int leaf_size = 100) 00059 { 00060 (*this)["algorithm"] = FLANN_INDEX_HIERARCHICAL; 00061 // The branching factor used in the hierarchical clustering 00062 (*this)["branching"] = branching; 00063 // Algorithm used for picking the initial cluster centers 00064 (*this)["centers_init"] = centers_init; 00065 // number of parallel trees to build 00066 (*this)["trees"] = trees; 00067 // maximum leaf size 00068 (*this)["leaf_size"] = leaf_size; 00069 } 00070 }; 00071 00072 00073 /** 00074 * Hierarchical index 00075 * 00076 * Contains a tree constructed through a hierarchical clustering 00077 * and other information for indexing a set of points for nearest-neighbour matching. 00078 */ 00079 template <typename Distance> 00080 class HierarchicalClusteringIndex : public NNIndex<Distance> 00081 { 00082 public: 00083 typedef typename Distance::ElementType ElementType; 00084 typedef typename Distance::ResultType DistanceType; 00085 00086 private: 00087 00088 00089 typedef void (HierarchicalClusteringIndex::* centersAlgFunction)(int, int*, int, int*, int&); 00090 00091 /** 00092 * The function used for choosing the cluster centers. 00093 */ 00094 centersAlgFunction chooseCenters; 00095 00096 00097 00098 /** 00099 * Chooses the initial centers in the k-means clustering in a random manner. 00100 * 00101 * Params: 00102 * k = number of centers 00103 * vecs = the dataset of points 00104 * indices = indices in the dataset 00105 * indices_length = length of indices vector 00106 * 00107 */ 00108 void chooseCentersRandom(int k, int* dsindices, int indices_length, int* centers, int& centers_length) 00109 { 00110 UniqueRandom r(indices_length); 00111 00112 int index; 00113 for (index=0; index<k; ++index) { 00114 bool duplicate = true; 00115 int rnd; 00116 while (duplicate) { 00117 duplicate = false; 00118 rnd = r.next(); 00119 if (rnd<0) { 00120 centers_length = index; 00121 return; 00122 } 00123 00124 centers[index] = dsindices[rnd]; 00125 00126 for (int j=0; j<index; ++j) { 00127 DistanceType sq = distance(dataset[centers[index]], dataset[centers[j]], dataset.cols); 00128 if (sq<1e-16) { 00129 duplicate = true; 00130 } 00131 } 00132 } 00133 } 00134 00135 centers_length = index; 00136 } 00137 00138 00139 /** 00140 * Chooses the initial centers in the k-means using Gonzales' algorithm 00141 * so that the centers are spaced apart from each other. 00142 * 00143 * Params: 00144 * k = number of centers 00145 * vecs = the dataset of points 00146 * indices = indices in the dataset 00147 * Returns: 00148 */ 00149 void chooseCentersGonzales(int k, int* dsindices, int indices_length, int* centers, int& centers_length) 00150 { 00151 int n = indices_length; 00152 00153 int rnd = rand_int(n); 00154 assert(rnd >=0 && rnd < n); 00155 00156 centers[0] = dsindices[rnd]; 00157 00158 int index; 00159 for (index=1; index<k; ++index) { 00160 00161 int best_index = -1; 00162 DistanceType best_val = 0; 00163 for (int j=0; j<n; ++j) { 00164 DistanceType dist = distance(dataset[centers[0]],dataset[dsindices[j]],dataset.cols); 00165 for (int i=1; i<index; ++i) { 00166 DistanceType tmp_dist = distance(dataset[centers[i]],dataset[dsindices[j]],dataset.cols); 00167 if (tmp_dist<dist) { 00168 dist = tmp_dist; 00169 } 00170 } 00171 if (dist>best_val) { 00172 best_val = dist; 00173 best_index = j; 00174 } 00175 } 00176 if (best_index!=-1) { 00177 centers[index] = dsindices[best_index]; 00178 } 00179 else { 00180 break; 00181 } 00182 } 00183 centers_length = index; 00184 } 00185 00186 00187 /** 00188 * Chooses the initial centers in the k-means using the algorithm 00189 * proposed in the KMeans++ paper: 00190 * Arthur, David; Vassilvitskii, Sergei - k-means++: The Advantages of Careful Seeding 00191 * 00192 * Implementation of this function was converted from the one provided in Arthur's code. 00193 * 00194 * Params: 00195 * k = number of centers 00196 * vecs = the dataset of points 00197 * indices = indices in the dataset 00198 * Returns: 00199 */ 00200 void chooseCentersKMeanspp(int k, int* dsindices, int indices_length, int* centers, int& centers_length) 00201 { 00202 int n = indices_length; 00203 00204 double currentPot = 0; 00205 DistanceType* closestDistSq = new DistanceType[n]; 00206 00207 // Choose one random center and set the closestDistSq values 00208 int index = rand_int(n); 00209 assert(index >=0 && index < n); 00210 centers[0] = dsindices[index]; 00211 00212 // Computing distance^2 will have the advantage of even higher probability further to pick new centers 00213 // far from previous centers (and this complies to "k-means++: the advantages of careful seeding" article) 00214 for (int i = 0; i < n; i++) { 00215 closestDistSq[i] = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols); 00216 closestDistSq[i] = ensureSquareDistance<Distance>( closestDistSq[i] ); 00217 currentPot += closestDistSq[i]; 00218 } 00219 00220 00221 const int numLocalTries = 1; 00222 00223 // Choose each center 00224 int centerCount; 00225 for (centerCount = 1; centerCount < k; centerCount++) { 00226 00227 // Repeat several trials 00228 double bestNewPot = -1; 00229 int bestNewIndex = 0; 00230 for (int localTrial = 0; localTrial < numLocalTries; localTrial++) { 00231 00232 // Choose our center - have to be slightly careful to return a valid answer even accounting 00233 // for possible rounding errors 00234 double randVal = rand_double(currentPot); 00235 for (index = 0; index < n-1; index++) { 00236 if (randVal <= closestDistSq[index]) break; 00237 else randVal -= closestDistSq[index]; 00238 } 00239 00240 // Compute the new potential 00241 double newPot = 0; 00242 for (int i = 0; i < n; i++) { 00243 DistanceType dist = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols); 00244 newPot += std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] ); 00245 } 00246 00247 // Store the best result 00248 if ((bestNewPot < 0)||(newPot < bestNewPot)) { 00249 bestNewPot = newPot; 00250 bestNewIndex = index; 00251 } 00252 } 00253 00254 // Add the appropriate center 00255 centers[centerCount] = dsindices[bestNewIndex]; 00256 currentPot = bestNewPot; 00257 for (int i = 0; i < n; i++) { 00258 DistanceType dist = distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols); 00259 closestDistSq[i] = std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] ); 00260 } 00261 } 00262 00263 centers_length = centerCount; 00264 00265 delete[] closestDistSq; 00266 } 00267 00268 00269 /** 00270 * Chooses the initial centers in a way inspired by Gonzales (by Pierre-Emmanuel Viel): 00271 * select the first point of the list as a candidate, then parse the points list. If another 00272 * point is further than current candidate from the other centers, test if it is a good center 00273 * of a local aggregation. If it is, replace current candidate by this point. And so on... 00274 * 00275 * Used with KMeansIndex that computes centers coordinates by averaging positions of clusters points, 00276 * this doesn't make a real difference with previous methods. But used with HierarchicalClusteringIndex 00277 * class that pick centers among existing points instead of computing the barycenters, there is a real 00278 * improvement. 00279 * 00280 * Params: 00281 * k = number of centers 00282 * vecs = the dataset of points 00283 * indices = indices in the dataset 00284 * Returns: 00285 */ 00286 void GroupWiseCenterChooser(int k, int* dsindices, int indices_length, int* centers, int& centers_length) 00287 { 00288 const float kSpeedUpFactor = 1.3f; 00289 00290 int n = indices_length; 00291 00292 DistanceType* closestDistSq = new DistanceType[n]; 00293 00294 // Choose one random center and set the closestDistSq values 00295 int index = rand_int(n); 00296 assert(index >=0 && index < n); 00297 centers[0] = dsindices[index]; 00298 00299 for (int i = 0; i < n; i++) { 00300 closestDistSq[i] = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols); 00301 } 00302 00303 00304 // Choose each center 00305 int centerCount; 00306 for (centerCount = 1; centerCount < k; centerCount++) { 00307 00308 // Repeat several trials 00309 double bestNewPot = -1; 00310 int bestNewIndex = 0; 00311 DistanceType furthest = 0; 00312 for (index = 0; index < n; index++) { 00313 00314 // We will test only the potential of the points further than current candidate 00315 if( closestDistSq[index] > kSpeedUpFactor * (float)furthest ) { 00316 00317 // Compute the new potential 00318 double newPot = 0; 00319 for (int i = 0; i < n; i++) { 00320 newPot += std::min( distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols) 00321 , closestDistSq[i] ); 00322 } 00323 00324 // Store the best result 00325 if ((bestNewPot < 0)||(newPot <= bestNewPot)) { 00326 bestNewPot = newPot; 00327 bestNewIndex = index; 00328 furthest = closestDistSq[index]; 00329 } 00330 } 00331 } 00332 00333 // Add the appropriate center 00334 centers[centerCount] = dsindices[bestNewIndex]; 00335 for (int i = 0; i < n; i++) { 00336 closestDistSq[i] = std::min( distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols) 00337 , closestDistSq[i] ); 00338 } 00339 } 00340 00341 centers_length = centerCount; 00342 00343 delete[] closestDistSq; 00344 } 00345 00346 00347 public: 00348 00349 00350 /** 00351 * Index constructor 00352 * 00353 * Params: 00354 * inputData = dataset with the input features 00355 * params = parameters passed to the hierarchical k-means algorithm 00356 */ 00357 HierarchicalClusteringIndex(const Matrix<ElementType> & inputData, const IndexParams& index_params = HierarchicalClusteringIndexParams(), 00358 Distance d = Distance()) 00359 : dataset(inputData), params(index_params), root(NULL), indices(NULL), distance(d) 00360 { 00361 memoryCounter = 0; 00362 00363 size_ = dataset.rows; 00364 veclen_ = dataset.cols; 00365 00366 branching_ = get_param(params,"branching",32); 00367 centers_init_ = get_param(params,"centers_init", FLANN_CENTERS_RANDOM); 00368 trees_ = get_param(params,"trees",4); 00369 leaf_size_ = get_param(params,"leaf_size",100); 00370 00371 if (centers_init_==FLANN_CENTERS_RANDOM) { 00372 chooseCenters = &HierarchicalClusteringIndex::chooseCentersRandom; 00373 } 00374 else if (centers_init_==FLANN_CENTERS_GONZALES) { 00375 chooseCenters = &HierarchicalClusteringIndex::chooseCentersGonzales; 00376 } 00377 else if (centers_init_==FLANN_CENTERS_KMEANSPP) { 00378 chooseCenters = &HierarchicalClusteringIndex::chooseCentersKMeanspp; 00379 } 00380 else if (centers_init_==FLANN_CENTERS_GROUPWISE) { 00381 chooseCenters = &HierarchicalClusteringIndex::GroupWiseCenterChooser; 00382 } 00383 else { 00384 throw FLANNException("Unknown algorithm for choosing initial centers."); 00385 } 00386 00387 trees_ = get_param(params,"trees",4); 00388 root = new NodePtr[trees_]; 00389 indices = new int*[trees_]; 00390 00391 for (int i=0; i<trees_; ++i) { 00392 root[i] = NULL; 00393 indices[i] = NULL; 00394 } 00395 } 00396 00397 HierarchicalClusteringIndex(const HierarchicalClusteringIndex&); 00398 HierarchicalClusteringIndex& operator=(const HierarchicalClusteringIndex&); 00399 00400 /** 00401 * Index destructor. 00402 * 00403 * Release the memory used by the index. 00404 */ 00405 virtual ~HierarchicalClusteringIndex() 00406 { 00407 free_elements(); 00408 00409 if (root!=NULL) { 00410 delete[] root; 00411 } 00412 00413 if (indices!=NULL) { 00414 delete[] indices; 00415 } 00416 } 00417 00418 00419 /** 00420 * Release the inner elements of indices[] 00421 */ 00422 void free_elements() 00423 { 00424 if (indices!=NULL) { 00425 for(int i=0; i<trees_; ++i) { 00426 if (indices[i]!=NULL) { 00427 delete[] indices[i]; 00428 indices[i] = NULL; 00429 } 00430 } 00431 } 00432 } 00433 00434 00435 /** 00436 * Returns size of index. 00437 */ 00438 size_t size() const 00439 { 00440 return size_; 00441 } 00442 00443 /** 00444 * Returns the length of an index feature. 00445 */ 00446 size_t veclen() const 00447 { 00448 return veclen_; 00449 } 00450 00451 00452 /** 00453 * Computes the inde memory usage 00454 * Returns: memory used by the index 00455 */ 00456 int usedMemory() const 00457 { 00458 return pool.usedMemory+pool.wastedMemory+memoryCounter; 00459 } 00460 00461 /** 00462 * Builds the index 00463 */ 00464 void buildIndex() 00465 { 00466 if (branching_<2) { 00467 throw FLANNException("Branching factor must be at least 2"); 00468 } 00469 00470 free_elements(); 00471 00472 for (int i=0; i<trees_; ++i) { 00473 indices[i] = new int[size_]; 00474 for (size_t j=0; j<size_; ++j) { 00475 indices[i][j] = (int)j; 00476 } 00477 root[i] = pool.allocate<Node>(); 00478 computeClustering(root[i], indices[i], (int)size_, branching_,0); 00479 } 00480 } 00481 00482 00483 flann_algorithm_t getType () const 00484 { 00485 return FLANN_INDEX_HIERARCHICAL; 00486 } 00487 00488 00489 void saveIndex(FILE* stream) 00490 { 00491 save_value(stream, branching_); 00492 save_value(stream, trees_); 00493 save_value(stream, centers_init_); 00494 save_value(stream, leaf_size_); 00495 save_value(stream, memoryCounter); 00496 for (int i=0; i<trees_; ++i) { 00497 save_value(stream, *indices[i], size_); 00498 save_tree(stream, root[i], i); 00499 } 00500 00501 } 00502 00503 00504 void loadIndex(FILE* stream) 00505 { 00506 free_elements(); 00507 00508 if (root!=NULL) { 00509 delete[] root; 00510 } 00511 00512 if (indices!=NULL) { 00513 delete[] indices; 00514 } 00515 00516 load_value(stream, branching_); 00517 load_value(stream, trees_); 00518 load_value(stream, centers_init_); 00519 load_value(stream, leaf_size_); 00520 load_value(stream, memoryCounter); 00521 00522 indices = new int*[trees_]; 00523 root = new NodePtr[trees_]; 00524 for (int i=0; i<trees_; ++i) { 00525 indices[i] = new int[size_]; 00526 load_value(stream, *indices[i], size_); 00527 load_tree(stream, root[i], i); 00528 } 00529 00530 params["algorithm"] = getType (); 00531 params["branching"] = branching_; 00532 params["trees"] = trees_; 00533 params["centers_init"] = centers_init_; 00534 params["leaf_size"] = leaf_size_; 00535 } 00536 00537 00538 /** 00539 * Find set of nearest neighbors to vec. Their indices are stored inside 00540 * the result object. 00541 * 00542 * Params: 00543 * result = the result object in which the indices of the nearest-neighbors are stored 00544 * vec = the vector for which to search the nearest neighbors 00545 * searchParams = parameters that influence the search algorithm (checks) 00546 */ 00547 void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) 00548 { 00549 00550 int maxChecks = get_param(searchParams,"checks",32); 00551 00552 // Priority queue storing intermediate branches in the best-bin-first search 00553 Heap<BranchSt>* heap = new Heap<BranchSt>((int)size_); 00554 00555 std::vector<bool> checked(size_,false); 00556 int checks = 0; 00557 for (int i=0; i<trees_; ++i) { 00558 findNN(root[i], result, vec, checks, maxChecks, heap, checked); 00559 } 00560 00561 BranchSt branch; 00562 while (heap->popMin(branch) && (checks<maxChecks || !result.full())) { 00563 NodePtr node = branch.node; 00564 findNN(node, result, vec, checks, maxChecks, heap, checked); 00565 } 00566 assert(result.full()); 00567 00568 delete heap; 00569 00570 } 00571 00572 IndexParams getParameters () const 00573 { 00574 return params; 00575 } 00576 00577 00578 private: 00579 00580 /** 00581 * Struture representing a node in the hierarchical k-means tree. 00582 */ 00583 struct Node 00584 { 00585 /** 00586 * The cluster center index 00587 */ 00588 int pivot; 00589 /** 00590 * The cluster size (number of points in the cluster) 00591 */ 00592 int size; 00593 /** 00594 * Child nodes (only for non-terminal nodes) 00595 */ 00596 Node** childs; 00597 /** 00598 * Node points (only for terminal nodes) 00599 */ 00600 int* indices; 00601 /** 00602 * Level 00603 */ 00604 int level; 00605 }; 00606 typedef Node* NodePtr; 00607 00608 00609 00610 /** 00611 * Alias definition for a nicer syntax. 00612 */ 00613 typedef BranchStruct<NodePtr, DistanceType> BranchSt; 00614 00615 00616 00617 void save_tree(FILE* stream, NodePtr node, int num) 00618 { 00619 save_value(stream, *node); 00620 if (node->childs==NULL) { 00621 int indices_offset = (int)(node->indices - indices[num]); 00622 save_value(stream, indices_offset); 00623 } 00624 else { 00625 for(int i=0; i<branching_; ++i) { 00626 save_tree(stream, node->childs[i], num); 00627 } 00628 } 00629 } 00630 00631 00632 void load_tree(FILE* stream, NodePtr& node, int num) 00633 { 00634 node = pool.allocate<Node>(); 00635 load_value(stream, *node); 00636 if (node->childs==NULL) { 00637 int indices_offset; 00638 load_value(stream, indices_offset); 00639 node->indices = indices[num] + indices_offset; 00640 } 00641 else { 00642 node->childs = pool.allocate<NodePtr>(branching_); 00643 for(int i=0; i<branching_; ++i) { 00644 load_tree(stream, node->childs[i], num); 00645 } 00646 } 00647 } 00648 00649 00650 00651 00652 void computeLabels(int* dsindices, int indices_length, int* centers, int centers_length, int* labels, DistanceType& cost) 00653 { 00654 cost = 0; 00655 for (int i=0; i<indices_length; ++i) { 00656 ElementType* point = dataset[dsindices[i]]; 00657 DistanceType dist = distance(point, dataset[centers[0]], veclen_); 00658 labels[i] = 0; 00659 for (int j=1; j<centers_length; ++j) { 00660 DistanceType new_dist = distance(point, dataset[centers[j]], veclen_); 00661 if (dist>new_dist) { 00662 labels[i] = j; 00663 dist = new_dist; 00664 } 00665 } 00666 cost += dist; 00667 } 00668 } 00669 00670 /** 00671 * The method responsible with actually doing the recursive hierarchical 00672 * clustering 00673 * 00674 * Params: 00675 * node = the node to cluster 00676 * indices = indices of the points belonging to the current node 00677 * branching = the branching factor to use in the clustering 00678 * 00679 * TODO: for 1-sized clusters don't store a cluster center (it's the same as the single cluster point) 00680 */ 00681 void computeClustering(NodePtr node, int* dsindices, int indices_length, int branching, int level) 00682 { 00683 node->size = indices_length; 00684 node->level = level; 00685 00686 if (indices_length < leaf_size_) { // leaf node 00687 node->indices = dsindices; 00688 std::sort(node->indices,node->indices+indices_length); 00689 node->childs = NULL; 00690 return; 00691 } 00692 00693 std::vector<int> centers(branching); 00694 std::vector<int> labels(indices_length); 00695 00696 int centers_length; 00697 (this->*chooseCenters)(branching, dsindices, indices_length, ¢ers[0], centers_length); 00698 00699 if (centers_length<branching) { 00700 node->indices = dsindices; 00701 std::sort(node->indices,node->indices+indices_length); 00702 node->childs = NULL; 00703 return; 00704 } 00705 00706 00707 // assign points to clusters 00708 DistanceType cost; 00709 computeLabels(dsindices, indices_length, ¢ers[0], centers_length, &labels[0], cost); 00710 00711 node->childs = pool.allocate<NodePtr>(branching); 00712 int start = 0; 00713 int end = start; 00714 for (int i=0; i<branching; ++i) { 00715 for (int j=0; j<indices_length; ++j) { 00716 if (labels[j]==i) { 00717 std::swap(dsindices[j],dsindices[end]); 00718 std::swap(labels[j],labels[end]); 00719 end++; 00720 } 00721 } 00722 00723 node->childs[i] = pool.allocate<Node>(); 00724 node->childs[i]->pivot = centers[i]; 00725 node->childs[i]->indices = NULL; 00726 computeClustering(node->childs[i],dsindices+start, end-start, branching, level+1); 00727 start=end; 00728 } 00729 } 00730 00731 00732 00733 /** 00734 * Performs one descent in the hierarchical k-means tree. The branches not 00735 * visited are stored in a priority queue. 00736 * 00737 * Params: 00738 * node = node to explore 00739 * result = container for the k-nearest neighbors found 00740 * vec = query points 00741 * checks = how many points in the dataset have been checked so far 00742 * maxChecks = maximum dataset points to checks 00743 */ 00744 00745 00746 void findNN(NodePtr node, ResultSet<DistanceType>& result, const ElementType* vec, int& checks, int maxChecks, 00747 Heap<BranchSt>* heap, std::vector<bool>& checked) 00748 { 00749 if (node->childs==NULL) { 00750 if (checks>=maxChecks) { 00751 if (result.full()) return; 00752 } 00753 for (int i=0; i<node->size; ++i) { 00754 int index = node->indices[i]; 00755 if (!checked[index]) { 00756 DistanceType dist = distance(dataset[index], vec, veclen_); 00757 result.addPoint(dist, index); 00758 checked[index] = true; 00759 ++checks; 00760 } 00761 } 00762 } 00763 else { 00764 DistanceType* domain_distances = new DistanceType[branching_]; 00765 int best_index = 0; 00766 domain_distances[best_index] = distance(vec, dataset[node->childs[best_index]->pivot], veclen_); 00767 for (int i=1; i<branching_; ++i) { 00768 domain_distances[i] = distance(vec, dataset[node->childs[i]->pivot], veclen_); 00769 if (domain_distances[i]<domain_distances[best_index]) { 00770 best_index = i; 00771 } 00772 } 00773 for (int i=0; i<branching_; ++i) { 00774 if (i!=best_index) { 00775 heap->insert(BranchSt(node->childs[i],domain_distances[i])); 00776 } 00777 } 00778 delete[] domain_distances; 00779 findNN(node->childs[best_index],result,vec, checks, maxChecks, heap, checked); 00780 } 00781 } 00782 00783 private: 00784 00785 00786 /** 00787 * The dataset used by this index 00788 */ 00789 const Matrix<ElementType> dataset; 00790 00791 /** 00792 * Parameters used by this index 00793 */ 00794 IndexParams params; 00795 00796 00797 /** 00798 * Number of features in the dataset. 00799 */ 00800 size_t size_; 00801 00802 /** 00803 * Length of each feature. 00804 */ 00805 size_t veclen_; 00806 00807 /** 00808 * The root node in the tree. 00809 */ 00810 NodePtr* root; 00811 00812 /** 00813 * Array of indices to vectors in the dataset. 00814 */ 00815 int** indices; 00816 00817 00818 /** 00819 * The distance 00820 */ 00821 Distance distance; 00822 00823 /** 00824 * Pooled memory allocator. 00825 * 00826 * Using a pooled memory allocator is more efficient 00827 * than allocating memory directly when there is a large 00828 * number small of memory allocations. 00829 */ 00830 PooledAllocator pool; 00831 00832 /** 00833 * Memory occupied by the index. 00834 */ 00835 int memoryCounter; 00836 00837 /** index parameters */ 00838 int branching_; 00839 int trees_; 00840 flann_centers_init_t centers_init_; 00841 int leaf_size_; 00842 00843 00844 }; 00845 00846 } 00847 00848 #endif /* OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_ */ 00849
Generated on Tue Jul 12 2022 16:42:38 by 1.7.2