main | Mbed

Users » joeverbout » Code » main » Documentation
opencv on mbed
Embed: (wiki syntax)
Show/hide line numbers hierarchical_clustering_index.h Source File
00001 /***********************************************************************
00002  * Software License Agreement (BSD License)
00003  *
00004  * Copyright 2008-2011  Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
00005  * Copyright 2008-2011  David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
00006  *
00007  * THE BSD LICENSE
00008  *
00009  * Redistribution and use in source and binary forms, with or without
00010  * modification, are permitted provided that the following conditions
00011  * are met:
00012  *
00013  * 1. Redistributions of source code must retain the above copyright
00014  *    notice, this list of conditions and the following disclaimer.
00015  * 2. Redistributions in binary form must reproduce the above copyright
00016  *    notice, this list of conditions and the following disclaimer in the
00017  *    documentation and/or other materials provided with the distribution.
00018  *
00019  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00020  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00021  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00022  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00023  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00024  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00025  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00026  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00027  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00028  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029  *************************************************************************/
00030 
00031 #ifndef OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_
00032 #define OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_
00033 
00034 #include <algorithm>
00035 #include <map>
00036 #include <cassert>
00037 #include <limits>
00038 #include <cmath>
00039 
00040 #include "general.h"
00041 #include "nn_index.h"
00042 #include "dist.h"
00043 #include "matrix.h"
00044 #include "result_set.h"
00045 #include "heap.h"
00046 #include "allocator.h"
00047 #include "random.h"
00048 #include "saving.h"
00049 
00050 
00051 namespace cvflann
00052 {
00053 
00054 struct HierarchicalClusteringIndexParams : public IndexParams
00055 {
00056     HierarchicalClusteringIndexParams(int branching = 32,
00057                                       flann_centers_init_t centers_init = FLANN_CENTERS_RANDOM,
00058                                       int trees = 4, int leaf_size = 100)
00059     {
00060         (*this)["algorithm"] = FLANN_INDEX_HIERARCHICAL;
00061         // The branching factor used in the hierarchical clustering
00062         (*this)["branching"] = branching;
00063         // Algorithm used for picking the initial cluster centers
00064         (*this)["centers_init"] = centers_init;
00065         // number of parallel trees to build
00066         (*this)["trees"] = trees;
00067         // maximum leaf size
00068         (*this)["leaf_size"] = leaf_size;
00069     }
00070 };
00071 
00072 
00073 /**
00074  * Hierarchical index
00075  *
00076  * Contains a tree constructed through a hierarchical clustering
00077  * and other information for indexing a set of points for nearest-neighbour matching.
00078  */
00079 template <typename Distance>
00080 class HierarchicalClusteringIndex : public NNIndex<Distance>
00081 {
00082 public:
00083     typedef typename Distance::ElementType ElementType;
00084     typedef typename Distance::ResultType DistanceType;
00085 
00086 private:
00087 
00088 
00089     typedef void (HierarchicalClusteringIndex::* centersAlgFunction)(int, int*, int, int*, int&);
00090 
00091     /**
00092      * The function used for choosing the cluster centers.
00093      */
00094     centersAlgFunction chooseCenters;
00095 
00096 
00097 
00098     /**
00099      * Chooses the initial centers in the k-means clustering in a random manner.
00100      *
00101      * Params:
00102      *     k = number of centers
00103      *     vecs = the dataset of points
00104      *     indices = indices in the dataset
00105      *     indices_length = length of indices vector
00106      *
00107      */
00108     void chooseCentersRandom(int k, int* dsindices, int indices_length, int* centers, int& centers_length)
00109     {
00110         UniqueRandom r(indices_length);
00111 
00112         int index;
00113         for (index=0; index<k; ++index) {
00114             bool duplicate = true;
00115             int rnd;
00116             while (duplicate) {
00117                 duplicate = false;
00118                 rnd = r.next();
00119                 if (rnd<0) {
00120                     centers_length = index;
00121                     return;
00122                 }
00123 
00124                 centers[index] = dsindices[rnd];
00125 
00126                 for (int j=0; j<index; ++j) {
00127                     DistanceType sq = distance(dataset[centers[index]], dataset[centers[j]], dataset.cols);
00128                     if (sq<1e-16) {
00129                         duplicate = true;
00130                     }
00131                 }
00132             }
00133         }
00134 
00135         centers_length = index;
00136     }
00137 
00138 
00139     /**
00140      * Chooses the initial centers in the k-means using Gonzales' algorithm
00141      * so that the centers are spaced apart from each other.
00142      *
00143      * Params:
00144      *     k = number of centers
00145      *     vecs = the dataset of points
00146      *     indices = indices in the dataset
00147      * Returns:
00148      */
00149     void chooseCentersGonzales(int k, int* dsindices, int indices_length, int* centers, int& centers_length)
00150     {
00151         int n = indices_length;
00152 
00153         int rnd = rand_int(n);
00154         assert(rnd >=0 && rnd < n);
00155 
00156         centers[0] = dsindices[rnd];
00157 
00158         int index;
00159         for (index=1; index<k; ++index) {
00160 
00161             int best_index = -1;
00162             DistanceType best_val = 0;
00163             for (int j=0; j<n; ++j) {
00164                 DistanceType dist = distance(dataset[centers[0]],dataset[dsindices[j]],dataset.cols);
00165                 for (int i=1; i<index; ++i) {
00166                     DistanceType tmp_dist = distance(dataset[centers[i]],dataset[dsindices[j]],dataset.cols);
00167                     if (tmp_dist<dist) {
00168                         dist = tmp_dist;
00169                     }
00170                 }
00171                 if (dist>best_val) {
00172                     best_val = dist;
00173                     best_index = j;
00174                 }
00175             }
00176             if (best_index!=-1) {
00177                 centers[index] = dsindices[best_index];
00178             }
00179             else {
00180                 break;
00181             }
00182         }
00183         centers_length = index;
00184     }
00185 
00186 
00187     /**
00188      * Chooses the initial centers in the k-means using the algorithm
00189      * proposed in the KMeans++ paper:
00190      * Arthur, David; Vassilvitskii, Sergei - k-means++: The Advantages of Careful Seeding
00191      *
00192      * Implementation of this function was converted from the one provided in Arthur's code.
00193      *
00194      * Params:
00195      *     k = number of centers
00196      *     vecs = the dataset of points
00197      *     indices = indices in the dataset
00198      * Returns:
00199      */
00200     void chooseCentersKMeanspp(int k, int* dsindices, int indices_length, int* centers, int& centers_length)
00201     {
00202         int n = indices_length;
00203 
00204         double currentPot = 0;
00205         DistanceType* closestDistSq = new DistanceType[n];
00206 
00207         // Choose one random center and set the closestDistSq values
00208         int index = rand_int(n);
00209         assert(index >=0 && index < n);
00210         centers[0] = dsindices[index];
00211 
00212         // Computing distance^2 will have the advantage of even higher probability further to pick new centers
00213         // far from previous centers (and this complies to "k-means++: the advantages of careful seeding" article)
00214         for (int i = 0; i < n; i++) {
00215             closestDistSq[i] = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
00216             closestDistSq[i] = ensureSquareDistance<Distance>( closestDistSq[i] );
00217             currentPot += closestDistSq[i];
00218         }
00219 
00220 
00221         const int numLocalTries = 1;
00222 
00223         // Choose each center
00224         int centerCount;
00225         for (centerCount = 1; centerCount < k; centerCount++) {
00226 
00227             // Repeat several trials
00228             double bestNewPot = -1;
00229             int bestNewIndex = 0;
00230             for (int localTrial = 0; localTrial < numLocalTries; localTrial++) {
00231 
00232                 // Choose our center - have to be slightly careful to return a valid answer even accounting
00233                 // for possible rounding errors
00234                 double randVal = rand_double(currentPot);
00235                 for (index = 0; index < n-1; index++) {
00236                     if (randVal <= closestDistSq[index]) break;
00237                     else randVal -= closestDistSq[index];
00238                 }
00239 
00240                 // Compute the new potential
00241                 double newPot = 0;
00242                 for (int i = 0; i < n; i++) {
00243                     DistanceType dist = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
00244                     newPot += std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
00245                 }
00246 
00247                 // Store the best result
00248                 if ((bestNewPot < 0)||(newPot < bestNewPot)) {
00249                     bestNewPot = newPot;
00250                     bestNewIndex = index;
00251                 }
00252             }
00253 
00254             // Add the appropriate center
00255             centers[centerCount] = dsindices[bestNewIndex];
00256             currentPot = bestNewPot;
00257             for (int i = 0; i < n; i++) {
00258                 DistanceType dist = distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols);
00259                 closestDistSq[i] = std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
00260             }
00261         }
00262 
00263         centers_length = centerCount;
00264 
00265         delete[] closestDistSq;
00266     }
00267 
00268 
00269     /**
00270      * Chooses the initial centers in a way inspired by Gonzales (by Pierre-Emmanuel Viel):
00271      * select the first point of the list as a candidate, then parse the points list. If another
00272      * point is further than current candidate from the other centers, test if it is a good center
00273      * of a local aggregation. If it is, replace current candidate by this point. And so on...
00274      *
00275      * Used with KMeansIndex that computes centers coordinates by averaging positions of clusters points,
00276      * this doesn't make a real difference with previous methods. But used with HierarchicalClusteringIndex
00277      * class that pick centers among existing points instead of computing the barycenters, there is a real
00278      * improvement.
00279      *
00280      * Params:
00281      *     k = number of centers
00282      *     vecs = the dataset of points
00283      *     indices = indices in the dataset
00284      * Returns:
00285      */
00286     void GroupWiseCenterChooser(int k, int* dsindices, int indices_length, int* centers, int& centers_length)
00287     {
00288         const float kSpeedUpFactor = 1.3f;
00289 
00290         int n = indices_length;
00291 
00292         DistanceType* closestDistSq = new DistanceType[n];
00293 
00294         // Choose one random center and set the closestDistSq values
00295         int index = rand_int(n);
00296         assert(index >=0 && index < n);
00297         centers[0] = dsindices[index];
00298 
00299         for (int i = 0; i < n; i++) {
00300             closestDistSq[i] = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
00301         }
00302 
00303 
00304         // Choose each center
00305         int centerCount;
00306         for (centerCount = 1; centerCount < k; centerCount++) {
00307 
00308             // Repeat several trials
00309             double bestNewPot = -1;
00310             int bestNewIndex = 0;
00311             DistanceType furthest = 0;
00312             for (index = 0; index < n; index++) {
00313 
00314                 // We will test only the potential of the points further than current candidate
00315                 if( closestDistSq[index] > kSpeedUpFactor * (float)furthest ) {
00316 
00317                     // Compute the new potential
00318                     double newPot = 0;
00319                     for (int i = 0; i < n; i++) {
00320                         newPot += std::min( distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols)
00321                                             , closestDistSq[i] );
00322                     }
00323 
00324                     // Store the best result
00325                     if ((bestNewPot < 0)||(newPot <= bestNewPot)) {
00326                         bestNewPot = newPot;
00327                         bestNewIndex = index;
00328                         furthest = closestDistSq[index];
00329                     }
00330                 }
00331             }
00332 
00333             // Add the appropriate center
00334             centers[centerCount] = dsindices[bestNewIndex];
00335             for (int i = 0; i < n; i++) {
00336                 closestDistSq[i] = std::min( distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols)
00337                                              , closestDistSq[i] );
00338             }
00339         }
00340 
00341         centers_length = centerCount;
00342 
00343         delete[] closestDistSq;
00344     }
00345 
00346 
00347 public:
00348 
00349 
00350     /**
00351      * Index constructor
00352      *
00353      * Params:
00354      *          inputData = dataset with the input features
00355      *          params = parameters passed to the hierarchical k-means algorithm
00356      */
00357     HierarchicalClusteringIndex(const Matrix<ElementType> & inputData, const IndexParams& index_params = HierarchicalClusteringIndexParams(),
00358                                 Distance d = Distance())
00359         : dataset(inputData), params(index_params), root(NULL), indices(NULL), distance(d)
00360     {
00361         memoryCounter = 0;
00362 
00363         size_ = dataset.rows;
00364         veclen_ = dataset.cols;
00365 
00366         branching_ = get_param(params,"branching",32);
00367         centers_init_ = get_param(params,"centers_init", FLANN_CENTERS_RANDOM);
00368         trees_ = get_param(params,"trees",4);
00369         leaf_size_ = get_param(params,"leaf_size",100);
00370 
00371         if (centers_init_==FLANN_CENTERS_RANDOM) {
00372             chooseCenters = &HierarchicalClusteringIndex::chooseCentersRandom;
00373         }
00374         else if (centers_init_==FLANN_CENTERS_GONZALES) {
00375             chooseCenters = &HierarchicalClusteringIndex::chooseCentersGonzales;
00376         }
00377         else if (centers_init_==FLANN_CENTERS_KMEANSPP) {
00378             chooseCenters = &HierarchicalClusteringIndex::chooseCentersKMeanspp;
00379         }
00380         else if (centers_init_==FLANN_CENTERS_GROUPWISE) {
00381             chooseCenters = &HierarchicalClusteringIndex::GroupWiseCenterChooser;
00382         }
00383         else {
00384             throw FLANNException("Unknown algorithm for choosing initial centers.");
00385         }
00386 
00387         trees_ = get_param(params,"trees",4);
00388         root = new NodePtr[trees_];
00389         indices = new int*[trees_];
00390 
00391         for (int i=0; i<trees_; ++i) {
00392             root[i] = NULL;
00393             indices[i] = NULL;
00394         }
00395     }
00396 
00397     HierarchicalClusteringIndex(const HierarchicalClusteringIndex&);
00398     HierarchicalClusteringIndex& operator=(const HierarchicalClusteringIndex&);
00399 
00400     /**
00401      * Index destructor.
00402      *
00403      * Release the memory used by the index.
00404      */
00405     virtual ~HierarchicalClusteringIndex()
00406     {
00407         free_elements();
00408 
00409         if (root!=NULL) {
00410             delete[] root;
00411         }
00412 
00413         if (indices!=NULL) {
00414             delete[] indices;
00415         }
00416     }
00417 
00418 
00419     /**
00420      * Release the inner elements of indices[]
00421      */
00422     void free_elements()
00423     {
00424         if (indices!=NULL) {
00425             for(int i=0; i<trees_; ++i) {
00426                 if (indices[i]!=NULL) {
00427                     delete[] indices[i];
00428                     indices[i] = NULL;
00429                 }
00430             }
00431         }
00432     }
00433 
00434 
00435     /**
00436      *  Returns size of index.
00437      */
00438     size_t size() const
00439     {
00440         return size_;
00441     }
00442 
00443     /**
00444      * Returns the length of an index feature.
00445      */
00446     size_t veclen() const
00447     {
00448         return veclen_;
00449     }
00450 
00451 
00452     /**
00453      * Computes the inde memory usage
00454      * Returns: memory used by the index
00455      */
00456     int usedMemory() const
00457     {
00458         return pool.usedMemory+pool.wastedMemory+memoryCounter;
00459     }
00460 
00461     /**
00462      * Builds the index
00463      */
00464     void buildIndex()
00465     {
00466         if (branching_<2) {
00467             throw FLANNException("Branching factor must be at least 2");
00468         }
00469 
00470         free_elements();
00471 
00472         for (int i=0; i<trees_; ++i) {
00473             indices[i] = new int[size_];
00474             for (size_t j=0; j<size_; ++j) {
00475                 indices[i][j] = (int)j;
00476             }
00477             root[i] = pool.allocate<Node>();
00478             computeClustering(root[i], indices[i], (int)size_, branching_,0);
00479         }
00480     }
00481 
00482 
00483     flann_algorithm_t getType () const
00484     {
00485         return FLANN_INDEX_HIERARCHICAL;
00486     }
00487 
00488 
00489     void saveIndex(FILE* stream)
00490     {
00491         save_value(stream, branching_);
00492         save_value(stream, trees_);
00493         save_value(stream, centers_init_);
00494         save_value(stream, leaf_size_);
00495         save_value(stream, memoryCounter);
00496         for (int i=0; i<trees_; ++i) {
00497             save_value(stream, *indices[i], size_);
00498             save_tree(stream, root[i], i);
00499         }
00500 
00501     }
00502 
00503 
00504     void loadIndex(FILE* stream)
00505     {
00506         free_elements();
00507 
00508         if (root!=NULL) {
00509             delete[] root;
00510         }
00511 
00512         if (indices!=NULL) {
00513             delete[] indices;
00514         }
00515 
00516         load_value(stream, branching_);
00517         load_value(stream, trees_);
00518         load_value(stream, centers_init_);
00519         load_value(stream, leaf_size_);
00520         load_value(stream, memoryCounter);
00521 
00522         indices = new int*[trees_];
00523         root = new NodePtr[trees_];
00524         for (int i=0; i<trees_; ++i) {
00525             indices[i] = new int[size_];
00526             load_value(stream, *indices[i], size_);
00527             load_tree(stream, root[i], i);
00528         }
00529 
00530         params["algorithm"] = getType ();
00531         params["branching"] = branching_;
00532         params["trees"] = trees_;
00533         params["centers_init"] = centers_init_;
00534         params["leaf_size"] = leaf_size_;
00535     }
00536 
00537 
00538     /**
00539      * Find set of nearest neighbors to vec. Their indices are stored inside
00540      * the result object.
00541      *
00542      * Params:
00543      *     result = the result object in which the indices of the nearest-neighbors are stored
00544      *     vec = the vector for which to search the nearest neighbors
00545      *     searchParams = parameters that influence the search algorithm (checks)
00546      */
00547     void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams)
00548     {
00549 
00550         int maxChecks = get_param(searchParams,"checks",32);
00551 
00552         // Priority queue storing intermediate branches in the best-bin-first search
00553         Heap<BranchSt>* heap = new Heap<BranchSt>((int)size_);
00554 
00555         std::vector<bool> checked(size_,false);
00556         int checks = 0;
00557         for (int i=0; i<trees_; ++i) {
00558             findNN(root[i], result, vec, checks, maxChecks, heap, checked);
00559         }
00560 
00561         BranchSt branch;
00562         while (heap->popMin(branch) && (checks<maxChecks || !result.full())) {
00563             NodePtr node = branch.node;
00564             findNN(node, result, vec, checks, maxChecks, heap, checked);
00565         }
00566         assert(result.full());
00567 
00568         delete heap;
00569 
00570     }
00571 
00572     IndexParams getParameters () const
00573     {
00574         return params;
00575     }
00576 
00577 
00578 private:
00579 
00580     /**
00581      * Struture representing a node in the hierarchical k-means tree.
00582      */
00583     struct Node
00584     {
00585         /**
00586          * The cluster center index
00587          */
00588         int pivot;
00589         /**
00590          * The cluster size (number of points in the cluster)
00591          */
00592         int size;
00593         /**
00594          * Child nodes (only for non-terminal nodes)
00595          */
00596         Node** childs;
00597         /**
00598          * Node points (only for terminal nodes)
00599          */
00600         int* indices;
00601         /**
00602          * Level
00603          */
00604         int level;
00605     };
00606     typedef Node* NodePtr;
00607 
00608 
00609 
00610     /**
00611      * Alias definition for a nicer syntax.
00612      */
00613     typedef BranchStruct<NodePtr, DistanceType> BranchSt;
00614 
00615 
00616 
00617     void save_tree(FILE* stream, NodePtr node, int num)
00618     {
00619         save_value(stream, *node);
00620         if (node->childs==NULL) {
00621             int indices_offset = (int)(node->indices - indices[num]);
00622             save_value(stream, indices_offset);
00623         }
00624         else {
00625             for(int i=0; i<branching_; ++i) {
00626                 save_tree(stream, node->childs[i], num);
00627             }
00628         }
00629     }
00630 
00631 
00632     void load_tree(FILE* stream, NodePtr& node, int num)
00633     {
00634         node = pool.allocate<Node>();
00635         load_value(stream, *node);
00636         if (node->childs==NULL) {
00637             int indices_offset;
00638             load_value(stream, indices_offset);
00639             node->indices = indices[num] + indices_offset;
00640         }
00641         else {
00642             node->childs = pool.allocate<NodePtr>(branching_);
00643             for(int i=0; i<branching_; ++i) {
00644                 load_tree(stream, node->childs[i], num);
00645             }
00646         }
00647     }
00648 
00649 
00650 
00651 
00652     void computeLabels(int* dsindices, int indices_length,  int* centers, int centers_length, int* labels, DistanceType& cost)
00653     {
00654         cost = 0;
00655         for (int i=0; i<indices_length; ++i) {
00656             ElementType* point = dataset[dsindices[i]];
00657             DistanceType dist = distance(point, dataset[centers[0]], veclen_);
00658             labels[i] = 0;
00659             for (int j=1; j<centers_length; ++j) {
00660                 DistanceType new_dist = distance(point, dataset[centers[j]], veclen_);
00661                 if (dist>new_dist) {
00662                     labels[i] = j;
00663                     dist = new_dist;
00664                 }
00665             }
00666             cost += dist;
00667         }
00668     }
00669 
00670     /**
00671      * The method responsible with actually doing the recursive hierarchical
00672      * clustering
00673      *
00674      * Params:
00675      *     node = the node to cluster
00676      *     indices = indices of the points belonging to the current node
00677      *     branching = the branching factor to use in the clustering
00678      *
00679      * TODO: for 1-sized clusters don't store a cluster center (it's the same as the single cluster point)
00680      */
00681     void computeClustering(NodePtr node, int* dsindices, int indices_length, int branching, int level)
00682     {
00683         node->size = indices_length;
00684         node->level = level;
00685 
00686         if (indices_length < leaf_size_) { // leaf node
00687             node->indices = dsindices;
00688             std::sort(node->indices,node->indices+indices_length);
00689             node->childs = NULL;
00690             return;
00691         }
00692 
00693         std::vector<int> centers(branching);
00694         std::vector<int> labels(indices_length);
00695 
00696         int centers_length;
00697         (this->*chooseCenters)(branching, dsindices, indices_length, &centers[0], centers_length);
00698 
00699         if (centers_length<branching) {
00700             node->indices = dsindices;
00701             std::sort(node->indices,node->indices+indices_length);
00702             node->childs = NULL;
00703             return;
00704         }
00705 
00706 
00707         //  assign points to clusters
00708         DistanceType cost;
00709         computeLabels(dsindices, indices_length, &centers[0], centers_length, &labels[0], cost);
00710 
00711         node->childs = pool.allocate<NodePtr>(branching);
00712         int start = 0;
00713         int end = start;
00714         for (int i=0; i<branching; ++i) {
00715             for (int j=0; j<indices_length; ++j) {
00716                 if (labels[j]==i) {
00717                     std::swap(dsindices[j],dsindices[end]);
00718                     std::swap(labels[j],labels[end]);
00719                     end++;
00720                 }
00721             }
00722 
00723             node->childs[i] = pool.allocate<Node>();
00724             node->childs[i]->pivot = centers[i];
00725             node->childs[i]->indices = NULL;
00726             computeClustering(node->childs[i],dsindices+start, end-start, branching, level+1);
00727             start=end;
00728         }
00729     }
00730 
00731 
00732 
00733     /**
00734      * Performs one descent in the hierarchical k-means tree. The branches not
00735      * visited are stored in a priority queue.
00736      *
00737      * Params:
00738      *      node = node to explore
00739      *      result = container for the k-nearest neighbors found
00740      *      vec = query points
00741      *      checks = how many points in the dataset have been checked so far
00742      *      maxChecks = maximum dataset points to checks
00743      */
00744 
00745 
00746     void findNN(NodePtr node, ResultSet<DistanceType>& result, const ElementType* vec, int& checks, int maxChecks,
00747                 Heap<BranchSt>* heap, std::vector<bool>& checked)
00748     {
00749         if (node->childs==NULL) {
00750             if (checks>=maxChecks) {
00751                 if (result.full()) return;
00752             }
00753             for (int i=0; i<node->size; ++i) {
00754                 int index = node->indices[i];
00755                 if (!checked[index]) {
00756                     DistanceType dist = distance(dataset[index], vec, veclen_);
00757                     result.addPoint(dist, index);
00758                     checked[index] = true;
00759                     ++checks;
00760                 }
00761             }
00762         }
00763         else {
00764             DistanceType* domain_distances = new DistanceType[branching_];
00765             int best_index = 0;
00766             domain_distances[best_index] = distance(vec, dataset[node->childs[best_index]->pivot], veclen_);
00767             for (int i=1; i<branching_; ++i) {
00768                 domain_distances[i] = distance(vec, dataset[node->childs[i]->pivot], veclen_);
00769                 if (domain_distances[i]<domain_distances[best_index]) {
00770                     best_index = i;
00771                 }
00772             }
00773             for (int i=0; i<branching_; ++i) {
00774                 if (i!=best_index) {
00775                     heap->insert(BranchSt(node->childs[i],domain_distances[i]));
00776                 }
00777             }
00778             delete[] domain_distances;
00779             findNN(node->childs[best_index],result,vec, checks, maxChecks, heap, checked);
00780         }
00781     }
00782 
00783 private:
00784 
00785 
00786     /**
00787      * The dataset used by this index
00788      */
00789     const Matrix<ElementType> dataset;
00790 
00791     /**
00792      * Parameters used by this index
00793      */
00794     IndexParams params;
00795 
00796 
00797     /**
00798      * Number of features in the dataset.
00799      */
00800     size_t size_;
00801 
00802     /**
00803      * Length of each feature.
00804      */
00805     size_t veclen_;
00806 
00807     /**
00808      * The root node in the tree.
00809      */
00810     NodePtr* root;
00811 
00812     /**
00813      *  Array of indices to vectors in the dataset.
00814      */
00815     int** indices;
00816 
00817 
00818     /**
00819      * The distance
00820      */
00821     Distance distance;
00822 
00823     /**
00824      * Pooled memory allocator.
00825      *
00826      * Using a pooled memory allocator is more efficient
00827      * than allocating memory directly when there is a large
00828      * number small of memory allocations.
00829      */
00830     PooledAllocator pool;
00831 
00832     /**
00833      * Memory occupied by the index.
00834      */
00835     int memoryCounter;
00836 
00837     /** index parameters */
00838     int branching_;
00839     int trees_;
00840     flann_centers_init_t centers_init_;
00841     int leaf_size_;
00842 
00843 
00844 };
00845 
00846 }
00847 
00848 #endif /* OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_ */
00849
Repository toolbox

Export to desktop IDE
Build repository
Repository details

Type:	Program
Mbed OS support:	Mbed 2 deprecated
Created:	31 Mar 2016
Imports:	152
Forks:	0
Commits:	1
Dependents:	0
Dependencies:	1
Followers:	2
hierarchical_clustering_index.h

Repository toolbox

Repository details

Important Information for this Arm website

Access Warning