| 1 | /* | 
|---|
| 2 |  *    This program is free software; you can redistribute it and/or modify | 
|---|
| 3 |  *    it under the terms of the GNU General Public License as published by | 
|---|
| 4 |  *    the Free Software Foundation; either version 2 of the License, or | 
|---|
| 5 |  *    (at your option) any later version. | 
|---|
| 6 |  * | 
|---|
| 7 |  *    This program is distributed in the hope that it will be useful, | 
|---|
| 8 |  *    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 9 |  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|---|
| 10 |  *    GNU General Public License for more details. | 
|---|
| 11 |  * | 
|---|
| 12 |  *    You should have received a copy of the GNU General Public License | 
|---|
| 13 |  *    along with this program; if not, write to the Free Software | 
|---|
| 14 |  *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 
|---|
| 15 |  */ | 
|---|
| 16 |  | 
|---|
| 17 | /* | 
|---|
| 18 |  * MiddleOutConstructor.java | 
|---|
| 19 |  * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand | 
|---|
| 20 |  */ | 
|---|
| 21 |  | 
|---|
| 22 | package weka.core.neighboursearch.balltrees; | 
|---|
| 23 |  | 
|---|
| 24 | import weka.core.Instance; | 
|---|
| 25 | import weka.core.DenseInstance; | 
|---|
| 26 | import weka.core.Instances; | 
|---|
| 27 | import weka.core.Option; | 
|---|
| 28 | import weka.core.Randomizable; | 
|---|
| 29 | import weka.core.RevisionHandler; | 
|---|
| 30 | import weka.core.RevisionUtils; | 
|---|
| 31 | import weka.core.TechnicalInformation; | 
|---|
| 32 | import weka.core.TechnicalInformationHandler; | 
|---|
| 33 | import weka.core.Utils; | 
|---|
| 34 | import weka.core.TechnicalInformation.Field; | 
|---|
| 35 | import weka.core.TechnicalInformation.Type; | 
|---|
| 36 |  | 
|---|
| 37 | import java.util.Enumeration; | 
|---|
| 38 | import java.util.Random; | 
|---|
| 39 | import java.util.Vector; | 
|---|
| 40 | import java.util.ArrayList; | 
|---|
| 41 | import java.util.Iterator; | 
|---|
| 42 |  | 
|---|
| 43 | import java.io.Serializable; | 
|---|
| 44 |  | 
|---|
| 45 | /** | 
|---|
| 46 |  <!-- globalinfo-start --> | 
|---|
| 47 |  * The class that builds a BallTree middle out.<br/> | 
|---|
| 48 |  * <br/> | 
|---|
| 49 |  * For more information see also:<br/> | 
|---|
| 50 |  * <br/> | 
|---|
| 51 |  * Andrew W. Moore: The Anchors Hierarchy: Using the Triangle Inequality to Survive High Dimensional Data. In: UAI '00: Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence, San Francisco, CA, USA, 397-405, 2000.<br/> | 
|---|
| 52 |  * <br/> | 
|---|
| 53 |  * Ashraf Masood Kibriya (2007). Fast Algorithms for Nearest Neighbour Search. Hamilton, New Zealand. | 
|---|
| 54 |  * <p/> | 
|---|
| 55 |  <!-- globalinfo-end --> | 
|---|
| 56 |  * | 
|---|
| 57 |  <!-- technical-bibtex-start --> | 
|---|
| 58 |  * BibTeX: | 
|---|
| 59 |  * <pre> | 
|---|
| 60 |  * @inproceedings{Moore2000, | 
|---|
| 61 |  *    address = {San Francisco, CA, USA}, | 
|---|
| 62 |  *    author = {Andrew W. Moore}, | 
|---|
| 63 |  *    booktitle = {UAI '00: Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence}, | 
|---|
| 64 |  *    pages = {397-405}, | 
|---|
| 65 |  *    publisher = {Morgan Kaufmann Publishers Inc.}, | 
|---|
| 66 |  *    title = {The Anchors Hierarchy: Using the Triangle Inequality to Survive High Dimensional Data}, | 
|---|
| 67 |  *    year = {2000} | 
|---|
| 68 |  * } | 
|---|
| 69 |  *  | 
|---|
| 70 |  * @mastersthesis{Kibriya2007, | 
|---|
| 71 |  *    address = {Hamilton, New Zealand}, | 
|---|
| 72 |  *    author = {Ashraf Masood Kibriya}, | 
|---|
| 73 |  *    school = {Department of Computer Science, School of Computing and Mathematical Sciences, University of Waikato}, | 
|---|
| 74 |  *    title = {Fast Algorithms for Nearest Neighbour Search}, | 
|---|
| 75 |  *    year = {2007} | 
|---|
| 76 |  * } | 
|---|
| 77 |  * </pre> | 
|---|
| 78 |  * <p/> | 
|---|
| 79 |  <!-- technical-bibtex-end --> | 
|---|
| 80 |  * | 
|---|
| 81 |  <!-- options-start --> | 
|---|
| 82 |  * Valid options are: <p/> | 
|---|
| 83 |  *  | 
|---|
| 84 |  * <pre> -S <num> | 
|---|
| 85 |  *  The seed for the random number generator used | 
|---|
| 86 |  *  in selecting random anchor. | 
|---|
| 87 |  * (default: 1)</pre> | 
|---|
| 88 |  *  | 
|---|
| 89 |  * <pre> -R | 
|---|
| 90 |  *  Use randomly chosen initial anchors.</pre> | 
|---|
| 91 |  *  | 
|---|
| 92 |  <!-- options-end -->  | 
|---|
| 93 |  *  | 
|---|
| 94 |  * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) | 
|---|
| 95 |  * @version $Revision: 5987 $ | 
|---|
| 96 |  */ | 
|---|
| 97 | public class MiddleOutConstructor | 
|---|
| 98 |   extends BallTreeConstructor | 
|---|
| 99 |   implements Randomizable, TechnicalInformationHandler { | 
|---|
| 100 |  | 
|---|
| 101 |   /** for serialization. */ | 
|---|
| 102 |   private static final long serialVersionUID = -8523314263062524462L; | 
|---|
| 103 |  | 
|---|
| 104 |   /** Seed form random number generator. */ | 
|---|
| 105 |   protected int m_RSeed = 1; | 
|---|
| 106 |          | 
|---|
| 107 |   /**  | 
|---|
| 108 |    * The random number generator for selecting  | 
|---|
| 109 |    * the first anchor point randomly  | 
|---|
| 110 |    * (if selecting randomly). | 
|---|
| 111 |    */ | 
|---|
| 112 |   protected Random rand = new Random(m_RSeed); | 
|---|
| 113 |   | 
|---|
| 114 |   /** | 
|---|
| 115 |    * The radius of the smallest ball enclosing all the data points. | 
|---|
| 116 |    */ | 
|---|
| 117 |   private double rootRadius = -1; | 
|---|
| 118 |    | 
|---|
| 119 |   /**  | 
|---|
| 120 |    * True if the initial anchor is chosen randomly. False if it is the furthest | 
|---|
| 121 |    * point from the mean/centroid. | 
|---|
| 122 |    */ | 
|---|
| 123 |   protected boolean m_RandomInitialAnchor = true; | 
|---|
| 124 |  | 
|---|
| 125 |   /** | 
|---|
| 126 |    * Creates a new instance of MiddleOutConstructor. | 
|---|
| 127 |    */ | 
|---|
| 128 |   public MiddleOutConstructor() { | 
|---|
| 129 |   } | 
|---|
| 130 |  | 
|---|
| 131 |   /** | 
|---|
| 132 |    * Returns a string describing this nearest neighbour search algorithm. | 
|---|
| 133 |    *  | 
|---|
| 134 |    * @return            a description of the algorithm for displaying in the | 
|---|
| 135 |    *                    explorer/experimenter gui | 
|---|
| 136 |    */ | 
|---|
| 137 |   public String globalInfo() { | 
|---|
| 138 |     return  | 
|---|
| 139 |         "The class that builds a BallTree middle out.\n\n" | 
|---|
| 140 |       + "For more information see also:\n\n" | 
|---|
| 141 |       + getTechnicalInformation().toString(); | 
|---|
| 142 |   } | 
|---|
| 143 |      | 
|---|
| 144 |   /** | 
|---|
| 145 |    * Returns an instance of a TechnicalInformation object, containing detailed | 
|---|
| 146 |    * information about the technical background of this class, e.g., paper | 
|---|
| 147 |    * reference or book this class is based on. | 
|---|
| 148 |    *  | 
|---|
| 149 |    * @return            the technical information about this class | 
|---|
| 150 |    */ | 
|---|
| 151 |   public TechnicalInformation getTechnicalInformation() { | 
|---|
| 152 |     TechnicalInformation result; | 
|---|
| 153 |     TechnicalInformation additional; | 
|---|
| 154 |  | 
|---|
| 155 |     result = new TechnicalInformation(Type.INPROCEEDINGS); | 
|---|
| 156 |     result.setValue(Field.AUTHOR, "Andrew W. Moore"); | 
|---|
| 157 |     result.setValue(Field.TITLE, "The Anchors Hierarchy: Using the Triangle Inequality to Survive High Dimensional Data"); | 
|---|
| 158 |     result.setValue(Field.YEAR, "2000"); | 
|---|
| 159 |     result.setValue(Field.BOOKTITLE, "UAI '00: Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence"); | 
|---|
| 160 |     result.setValue(Field.PAGES, "397-405"); | 
|---|
| 161 |     result.setValue(Field.PUBLISHER, "Morgan Kaufmann Publishers Inc."); | 
|---|
| 162 |     result.setValue(Field.ADDRESS, "San Francisco, CA, USA"); | 
|---|
| 163 |  | 
|---|
| 164 |     additional = result.add(Type.MASTERSTHESIS); | 
|---|
| 165 |     additional.setValue(Field.AUTHOR, "Ashraf Masood Kibriya"); | 
|---|
| 166 |     additional.setValue(Field.TITLE, "Fast Algorithms for Nearest Neighbour Search"); | 
|---|
| 167 |     additional.setValue(Field.YEAR, "2007"); | 
|---|
| 168 |     additional.setValue(Field.SCHOOL, "Department of Computer Science, School of Computing and Mathematical Sciences, University of Waikato"); | 
|---|
| 169 |     additional.setValue(Field.ADDRESS, "Hamilton, New Zealand"); | 
|---|
| 170 |      | 
|---|
| 171 |     return result; | 
|---|
| 172 |   } | 
|---|
| 173 |  | 
|---|
| 174 |   /** | 
|---|
| 175 |    * Builds a ball tree middle out.  | 
|---|
| 176 |    * @return The root node of the tree.  | 
|---|
| 177 |    * @throws Exception If there is problem building | 
|---|
| 178 |    * the tree. | 
|---|
| 179 |    */ | 
|---|
| 180 |   public BallNode buildTree() throws Exception { | 
|---|
| 181 |     m_NumNodes = m_MaxDepth = m_NumLeaves = 0; | 
|---|
| 182 |     if(rootRadius == -1) { | 
|---|
| 183 |       rootRadius = BallNode.calcRadius(m_InstList, m_Instances,  | 
|---|
| 184 |                                 BallNode.calcCentroidPivot(m_InstList, m_Instances), | 
|---|
| 185 |                                 m_DistanceFunction); | 
|---|
| 186 |     } | 
|---|
| 187 |     BallNode root = buildTreeMiddleOut(0, m_Instances.numInstances()-1); | 
|---|
| 188 |     return root; | 
|---|
| 189 |   } | 
|---|
| 190 |  | 
|---|
| 191 |   /**  | 
|---|
| 192 |    * Builds a ball tree middle out from the  | 
|---|
| 193 |    * portion of the master index array given | 
|---|
| 194 |    * by supplied start and end index. | 
|---|
| 195 |    * @param startIdx The start of the portion | 
|---|
| 196 |    * in master index array. | 
|---|
| 197 |    * @param endIdx the end of the portion in  | 
|---|
| 198 |    * master index array. | 
|---|
| 199 |    * @return The root node of the built tree. | 
|---|
| 200 |    * @throws Exception If there is some  | 
|---|
| 201 |    * problem building the tree.  | 
|---|
| 202 |    */ | 
|---|
| 203 |   protected BallNode buildTreeMiddleOut(int startIdx, int endIdx)  | 
|---|
| 204 |     throws Exception { | 
|---|
| 205 |          | 
|---|
| 206 |     Instance pivot; | 
|---|
| 207 |     double radius; | 
|---|
| 208 |     Vector<TempNode> anchors; | 
|---|
| 209 |     int numInsts = endIdx - startIdx + 1; | 
|---|
| 210 |     int numAnchors = (int) Math.round(Math.sqrt(numInsts)); | 
|---|
| 211 |      | 
|---|
| 212 |     //create anchor's hierarchy | 
|---|
| 213 |     if (numAnchors > 1) { | 
|---|
| 214 |       pivot = BallNode.calcCentroidPivot(startIdx, endIdx, m_InstList,m_Instances); | 
|---|
| 215 |       radius = BallNode.calcRadius(startIdx, endIdx, m_InstList, m_Instances,  | 
|---|
| 216 |                                                            pivot, m_DistanceFunction);       | 
|---|
| 217 |       if(numInsts <= m_MaxInstancesInLeaf ||  | 
|---|
| 218 |          (rootRadius==0 ? true : radius/rootRadius < m_MaxRelLeafRadius)) { //just make a leaf don't make anchors hierarchy  | 
|---|
| 219 |                 BallNode node = new BallNode(startIdx, endIdx, m_NumNodes,pivot, radius); | 
|---|
| 220 |                 return node; | 
|---|
| 221 |           } | 
|---|
| 222 |       anchors = new Vector<TempNode>(numAnchors); | 
|---|
| 223 |       createAnchorsHierarchy(anchors, numAnchors, startIdx, endIdx); | 
|---|
| 224 |  | 
|---|
| 225 |       BallNode node = mergeNodes(anchors, startIdx, endIdx); | 
|---|
| 226 |        | 
|---|
| 227 |       buildLeavesMiddleOut(node); | 
|---|
| 228 |        | 
|---|
| 229 |       return node; | 
|---|
| 230 |     }// end anchors hierarchy | 
|---|
| 231 |     else { | 
|---|
| 232 |       BallNode node = new BallNode(startIdx, endIdx, m_NumNodes,  | 
|---|
| 233 |                       (pivot=BallNode.calcCentroidPivot(startIdx, endIdx,  | 
|---|
| 234 |                                                       m_InstList, m_Instances)),  | 
|---|
| 235 |                       BallNode.calcRadius(startIdx, endIdx, m_InstList,  | 
|---|
| 236 |                                           m_Instances, pivot,  | 
|---|
| 237 |                                           m_DistanceFunction) | 
|---|
| 238 |                          ); | 
|---|
| 239 |       return node; | 
|---|
| 240 |     }         | 
|---|
| 241 |   } | 
|---|
| 242 |    | 
|---|
| 243 |   /** | 
|---|
| 244 |    * Creates an anchors hierarchy from a portion | 
|---|
| 245 |    * of master index array. | 
|---|
| 246 |    *  | 
|---|
| 247 |    * @param anchors The vector for putting the anchors | 
|---|
| 248 |    * into.  | 
|---|
| 249 |    * @param numAnchors The number of anchors to create. | 
|---|
| 250 |    * @param startIdx The start of the portion of master | 
|---|
| 251 |    * index array. | 
|---|
| 252 |    * @param endIdx The end of the portion of master  | 
|---|
| 253 |    * index array. | 
|---|
| 254 |    * @throws Exception If there is some problem in creating  | 
|---|
| 255 |    * the hierarchy. | 
|---|
| 256 |    */ | 
|---|
| 257 |   protected void createAnchorsHierarchy(Vector<TempNode> anchors, final int numAnchors,  | 
|---|
| 258 |       final int startIdx, final int endIdx)  | 
|---|
| 259 |     throws Exception { | 
|---|
| 260 |      | 
|---|
| 261 |     TempNode anchr1 = m_RandomInitialAnchor ?  | 
|---|
| 262 |             getRandomAnchor(startIdx, endIdx) :  | 
|---|
| 263 |             getFurthestFromMeanAnchor(startIdx, endIdx); | 
|---|
| 264 |                        | 
|---|
| 265 |     TempNode amax = anchr1; //double maxradius = anchr1.radius; | 
|---|
| 266 |     TempNode newAnchor; | 
|---|
| 267 |     Vector<double[]> anchorDistances = new Vector<double[]>(numAnchors-1); | 
|---|
| 268 |     anchors.add(anchr1); | 
|---|
| 269 |  | 
|---|
| 270 |     //creating anchors | 
|---|
| 271 |     while(anchors.size() < numAnchors) { | 
|---|
| 272 |       //create new anchor | 
|---|
| 273 |       newAnchor = new TempNode(); | 
|---|
| 274 |       newAnchor.points = new MyIdxList();         | 
|---|
| 275 |       Instance newpivot = m_Instances.instance(((ListNode)amax.points.getFirst()).idx); | 
|---|
| 276 |       newAnchor.anchor = newpivot; | 
|---|
| 277 |       newAnchor.idx = ((ListNode)amax.points.getFirst()).idx; | 
|---|
| 278 |  | 
|---|
| 279 |       setInterAnchorDistances(anchors, newAnchor, anchorDistances); | 
|---|
| 280 |       if(stealPoints(newAnchor, anchors, anchorDistances)) //if points stolen | 
|---|
| 281 |         newAnchor.radius = ((ListNode)newAnchor.points.getFirst()).distance; | 
|---|
| 282 |       else | 
|---|
| 283 |         newAnchor.radius = 0.0; | 
|---|
| 284 |       anchors.add(newAnchor); | 
|---|
| 285 |  | 
|---|
| 286 |       //find new amax         | 
|---|
| 287 |       amax = (TempNode)anchors.elementAt(0); | 
|---|
| 288 |       for(int i=1; i<anchors.size(); i++) { | 
|---|
| 289 |         newAnchor = (TempNode)anchors.elementAt(i); | 
|---|
| 290 |         if(newAnchor.radius > amax.radius) | 
|---|
| 291 |           amax = newAnchor; | 
|---|
| 292 |       }//end for | 
|---|
| 293 |     }//end while | 
|---|
| 294 |   } | 
|---|
| 295 |    | 
|---|
| 296 |   /** | 
|---|
| 297 |    * Applies the middle out build procedure to  | 
|---|
| 298 |    * the leaves of the tree. The leaf nodes  | 
|---|
| 299 |    * should be the ones that were created by  | 
|---|
| 300 |    * createAnchorsHierarchy(). The process | 
|---|
| 301 |    * continues recursively for the leaves  | 
|---|
| 302 |    * created for each leaf of the given tree  | 
|---|
| 303 |    * until for some leaf node <=  | 
|---|
| 304 |    * m_MaxInstancesInLeaf instances remain | 
|---|
| 305 |    * in the leaf. | 
|---|
| 306 |    *  | 
|---|
| 307 |    * @param node The root of the tree. | 
|---|
| 308 |    * @throws Exception If there is some problem | 
|---|
| 309 |    * in building the tree leaves. | 
|---|
| 310 |    */ | 
|---|
| 311 |   protected void buildLeavesMiddleOut(BallNode node) throws Exception { | 
|---|
| 312 |     if(node.m_Left!=null && node.m_Right!=null) { //if an internal node | 
|---|
| 313 |       buildLeavesMiddleOut(node.m_Left); | 
|---|
| 314 |       buildLeavesMiddleOut(node.m_Right); | 
|---|
| 315 |     } | 
|---|
| 316 |     else if(node.m_Left!=null || node.m_Right!=null) { | 
|---|
| 317 |       throw new Exception("Invalid leaf assignment. Please check code"); | 
|---|
| 318 |     } | 
|---|
| 319 |     else { //if node is a leaf | 
|---|
| 320 |       BallNode n2 = buildTreeMiddleOut(node.m_Start, node.m_End); | 
|---|
| 321 |       if(n2.m_Left!=null && n2.m_Right!=null) { | 
|---|
| 322 |         node.m_Left = n2.m_Left; | 
|---|
| 323 |         node.m_Right = n2.m_Right; | 
|---|
| 324 |         buildLeavesMiddleOut(node);  | 
|---|
| 325 |         //the stopping condition in buildTreeMiddleOut will stop the recursion, | 
|---|
| 326 |         //where it won't split a node at all, and we won't recurse here. | 
|---|
| 327 |       } | 
|---|
| 328 |       else if(n2.m_Left!=null || n2.m_Right!=null) | 
|---|
| 329 |         throw new Exception("Invalid leaf assignment. Please check code"); | 
|---|
| 330 |     } | 
|---|
| 331 |   } | 
|---|
| 332 |  | 
|---|
| 333 |   /** | 
|---|
| 334 |    * Merges nodes created by createAnchorsHierarchy() | 
|---|
| 335 |    * into one top node. | 
|---|
| 336 |    *  | 
|---|
| 337 |    * @param list List of anchor nodes. | 
|---|
| 338 |    * @param startIdx The start of the portion of  | 
|---|
| 339 |    * master index array containing these anchor  | 
|---|
| 340 |    * nodes.  | 
|---|
| 341 |    * @param endIdx The end of the portion of master  | 
|---|
| 342 |    * index array containing these anchor nodes.  | 
|---|
| 343 |    * @return The top/root node after merging  | 
|---|
| 344 |    * the given anchor nodes. | 
|---|
| 345 |    * @throws Exception IF there is some problem in | 
|---|
| 346 |    * merging. | 
|---|
| 347 |    */ | 
|---|
| 348 |   protected BallNode mergeNodes(Vector<TempNode> list, int startIdx, int endIdx) | 
|---|
| 349 |     throws Exception { | 
|---|
| 350 |      | 
|---|
| 351 |     for(int i=0; i<list.size(); i++) { | 
|---|
| 352 |       TempNode n = (TempNode) list.get(i); | 
|---|
| 353 |       n.anchor = calcPivot(n.points, new MyIdxList(), m_Instances); | 
|---|
| 354 |       n.radius = calcRadius(n.points, new MyIdxList(), n.anchor, m_Instances); | 
|---|
| 355 |     } | 
|---|
| 356 |     double minRadius, tmpRadius; //tmpVolume, minVolume; | 
|---|
| 357 |     Instance pivot, minPivot=null; | 
|---|
| 358 |     TempNode parent; int min1=-1, min2=-1;     | 
|---|
| 359 |      | 
|---|
| 360 |     while(list.size() > 1) { //main merging loop | 
|---|
| 361 |       minRadius=Double.POSITIVE_INFINITY;       | 
|---|
| 362 |        | 
|---|
| 363 |       for(int i=0; i<list.size(); i++) { | 
|---|
| 364 |         TempNode first = (TempNode) list.get(i); | 
|---|
| 365 |         for(int j=i+1; j<list.size(); j++) { | 
|---|
| 366 |           TempNode second = (TempNode) list.get(j); | 
|---|
| 367 |           pivot = calcPivot(first, second, m_Instances);  | 
|---|
| 368 |           tmpRadius = calcRadius(first, second); //calcRadius(first.points, second.points, pivot, m_Instances);     | 
|---|
| 369 |           if(tmpRadius < minRadius) { //(tmpVolume < minVolume) { | 
|---|
| 370 |             minRadius = tmpRadius; //minVolume = tmpVolume;  | 
|---|
| 371 |             minPivot = pivot;  | 
|---|
| 372 |             min1=i; min2=j;  | 
|---|
| 373 |             //minInstList = tmpInstList; | 
|---|
| 374 |           } | 
|---|
| 375 |         }//end for(j) | 
|---|
| 376 |       }//end for(i) | 
|---|
| 377 |       parent = new TempNode(); | 
|---|
| 378 |       parent.left  = (TempNode) list.get(min1); | 
|---|
| 379 |       parent.right = (TempNode) list.get(min2); | 
|---|
| 380 |       parent.anchor = minPivot; | 
|---|
| 381 |       parent.radius = calcRadius(parent.left.points, parent.right.points, minPivot, m_Instances); //minRadius; | 
|---|
| 382 |       parent.points = parent.left.points.append(parent.left.points, parent.right.points); | 
|---|
| 383 |       list.remove(min1); list.remove(min2-1); | 
|---|
| 384 |       list.add(parent); | 
|---|
| 385 |     }//end while | 
|---|
| 386 |     TempNode tmpRoot = (TempNode)list.get(list.size()-1); | 
|---|
| 387 |      | 
|---|
| 388 |     if((endIdx-startIdx+1)!= tmpRoot.points.length()) { | 
|---|
| 389 |       throw new Exception("Root nodes instance list is of irregular length. " + | 
|---|
| 390 |                           "Please check code. Length should " + | 
|---|
| 391 |                           "be: " + (endIdx-startIdx+1) +  | 
|---|
| 392 |                           " whereas it is found to be: "+tmpRoot.points.length()); | 
|---|
| 393 |     } | 
|---|
| 394 |     for(int i=0; i<tmpRoot.points.length(); i++) { | 
|---|
| 395 |       m_InstList[startIdx+i] = ((ListNode)tmpRoot.points.get(i)).idx; | 
|---|
| 396 |     } | 
|---|
| 397 |      | 
|---|
| 398 |     BallNode node = makeBallTreeNodes(tmpRoot, startIdx, endIdx, 0); | 
|---|
| 399 |      | 
|---|
| 400 |     return node;     | 
|---|
| 401 |   } | 
|---|
| 402 |    | 
|---|
| 403 |   /** | 
|---|
| 404 |    * Makes BallTreeNodes out of TempNodes. | 
|---|
| 405 |    *   | 
|---|
| 406 |    * @param node The root TempNode | 
|---|
| 407 |    * @param startidx The start of the portion of  | 
|---|
| 408 |    * master index array the TempNodes  | 
|---|
| 409 |    * are made from.  | 
|---|
| 410 |    * @param endidx The end of the portion of  | 
|---|
| 411 |    * master index array the TempNodes are  | 
|---|
| 412 |    * made from.  | 
|---|
| 413 |    * @param depth The depth in the tree where  | 
|---|
| 414 |    * this root TempNode is made (needed when  | 
|---|
| 415 |    * leaves of a tree deeper down are built  | 
|---|
| 416 |    * middle out). | 
|---|
| 417 |    * @return The root BallTreeNode. | 
|---|
| 418 |    */ | 
|---|
| 419 |   protected BallNode makeBallTreeNodes(TempNode node, int startidx,  | 
|---|
| 420 |       int endidx, int depth) { | 
|---|
| 421 |     BallNode ball=null; | 
|---|
| 422 |      | 
|---|
| 423 |     if(node.left!=null && node.right!=null) { //make an internal node | 
|---|
| 424 |       ball = new BallNode( | 
|---|
| 425 |       startidx, endidx, m_NumNodes,  | 
|---|
| 426 |       node.anchor, | 
|---|
| 427 |       node.radius | 
|---|
| 428 |       ); | 
|---|
| 429 |       m_NumNodes += 1; | 
|---|
| 430 |       ball.m_Left = makeBallTreeNodes(node.left, startidx, startidx+node.left.points.length()-1, depth+1); | 
|---|
| 431 |       ball.m_Right= makeBallTreeNodes(node.right, startidx+node.left.points.length(), endidx, depth+1); | 
|---|
| 432 |       m_MaxDepth++; | 
|---|
| 433 |     } | 
|---|
| 434 |     else { //make a leaf node | 
|---|
| 435 |       ball = new BallNode(startidx, endidx, m_NumNodes,        | 
|---|
| 436 |       node.anchor,  | 
|---|
| 437 |       node.radius  | 
|---|
| 438 |                          ); | 
|---|
| 439 |       m_NumNodes += 1;       | 
|---|
| 440 |       m_NumLeaves += 1; | 
|---|
| 441 |     } | 
|---|
| 442 |     return ball; | 
|---|
| 443 |   } | 
|---|
| 444 |      | 
|---|
| 445 |   /** | 
|---|
| 446 |    * Returns an anchor point which is furthest from the | 
|---|
| 447 |    * mean point for a given set of points (instances)  | 
|---|
| 448 |    * (The anchor instance is chosen from the given | 
|---|
| 449 |    * set of points). | 
|---|
| 450 |    *  | 
|---|
| 451 |    * @param startIdx The start index of the points | 
|---|
| 452 |    * for which anchor point is required. | 
|---|
| 453 |    * @param endIdx The end index of the points for | 
|---|
| 454 |    * which anchor point is required. | 
|---|
| 455 |    * @return The furthest point/instance from the mean  | 
|---|
| 456 |    * of given set of points. | 
|---|
| 457 |    */ | 
|---|
| 458 |   protected TempNode getFurthestFromMeanAnchor(int startIdx, int endIdx) { | 
|---|
| 459 |     TempNode anchor = new TempNode(); | 
|---|
| 460 |     Instance centroid = BallNode.calcCentroidPivot(startIdx, endIdx, m_InstList,  | 
|---|
| 461 |                                                    m_Instances); | 
|---|
| 462 |     Instance temp; | 
|---|
| 463 |     double tmpr; | 
|---|
| 464 |     anchor.radius = Double.NEGATIVE_INFINITY; | 
|---|
| 465 |     for(int i=startIdx; i<=endIdx; i++) { | 
|---|
| 466 |       temp = m_Instances.instance(m_InstList[i]); | 
|---|
| 467 |       tmpr = m_DistanceFunction.distance(centroid, temp); | 
|---|
| 468 |       if(tmpr > anchor.radius) { | 
|---|
| 469 |         anchor.idx = m_InstList[i]; | 
|---|
| 470 |         anchor.anchor = temp; | 
|---|
| 471 |         anchor.radius = tmpr; | 
|---|
| 472 |       } | 
|---|
| 473 |     } | 
|---|
| 474 |      | 
|---|
| 475 |     setPoints(anchor, startIdx, endIdx, m_InstList); | 
|---|
| 476 |     return anchor; | 
|---|
| 477 |   } | 
|---|
| 478 |    | 
|---|
| 479 |   /**  | 
|---|
| 480 |    * Returns a random anchor point/instance from a  | 
|---|
| 481 |    * given set of points/instances. | 
|---|
| 482 |    *  | 
|---|
| 483 |    * @param startIdx The start index of the points | 
|---|
| 484 |    * for which anchor is required. | 
|---|
| 485 |    * @param endIdx The end index of the points for | 
|---|
| 486 |    * which anchor is required. | 
|---|
| 487 |    * @return The random anchor point/instance | 
|---|
| 488 |    * for the given set of  | 
|---|
| 489 |    */ | 
|---|
| 490 |   protected TempNode getRandomAnchor(int startIdx, int endIdx) { | 
|---|
| 491 |     TempNode anchr1 = new TempNode(); | 
|---|
| 492 |     anchr1.idx = m_InstList[startIdx+rand.nextInt((endIdx-startIdx+1))]; | 
|---|
| 493 |     anchr1.anchor = m_Instances.instance(anchr1.idx); | 
|---|
| 494 |     setPoints(anchr1, startIdx, endIdx, m_InstList); | 
|---|
| 495 |     anchr1.radius = ((ListNode)anchr1.points.getFirst()).distance; | 
|---|
| 496 |      | 
|---|
| 497 |     return anchr1; | 
|---|
| 498 |   } | 
|---|
| 499 |    | 
|---|
| 500 |   /** | 
|---|
| 501 |    * Sets the points of an anchor node. It takes the | 
|---|
| 502 |    * indices of points from the given portion of  | 
|---|
| 503 |    * an index array and stores those indices, together | 
|---|
| 504 |    * with their distances to the given anchor node,  | 
|---|
| 505 |    * in the point index list of the anchor node. | 
|---|
| 506 |    *   | 
|---|
| 507 |    * @param node The node in which the points are | 
|---|
| 508 |    * needed to be set. | 
|---|
| 509 |    * @param startIdx The start of the portion in  | 
|---|
| 510 |    * the given index array (the master index | 
|---|
| 511 |    * array). | 
|---|
| 512 |    * @param endIdx The end of the portion in the | 
|---|
| 513 |    * given index array.  | 
|---|
| 514 |    * @param indices The index array. | 
|---|
| 515 |    */ | 
|---|
| 516 |   public void setPoints(TempNode node, int startIdx, int endIdx, int[] indices) { | 
|---|
| 517 |     node.points = new MyIdxList();     | 
|---|
| 518 |     Instance temp; double dist; | 
|---|
| 519 |     for(int i=startIdx; i<=endIdx; i++) { | 
|---|
| 520 |       temp = m_Instances.instance(indices[i]); | 
|---|
| 521 |       dist = m_DistanceFunction.distance(node.anchor, temp); | 
|---|
| 522 |       node.points.insertReverseSorted(indices[i], dist); | 
|---|
| 523 |     } | 
|---|
| 524 |   } | 
|---|
| 525 |  | 
|---|
| 526 |   /** | 
|---|
| 527 |    * Sets the distances of a supplied new | 
|---|
| 528 |    * anchor to all the rest of the  | 
|---|
| 529 |    * previous anchor points. | 
|---|
| 530 |    * @param anchors The old anchor points. | 
|---|
| 531 |    * @param newAnchor The new anchor point. | 
|---|
| 532 |    * @param anchorDistances The vector to | 
|---|
| 533 |    * store the distances of newAnchor to  | 
|---|
| 534 |    * each of the old anchors. | 
|---|
| 535 |    * @throws Exception If there is some  | 
|---|
| 536 |    * problem in calculating the distances. | 
|---|
| 537 |    */ | 
|---|
| 538 |   public void setInterAnchorDistances(Vector<TempNode> anchors, TempNode newAnchor, | 
|---|
| 539 |                                       Vector<double[]> anchorDistances) throws Exception { | 
|---|
| 540 |     double[] distArray = new double[anchors.size()]; | 
|---|
| 541 |      | 
|---|
| 542 |     for(int i=0; i<anchors.size(); i++) { | 
|---|
| 543 |       Instance anchr = ((TempNode)anchors.elementAt(i)).anchor; | 
|---|
| 544 |       distArray[i] = m_DistanceFunction.distance(anchr, newAnchor.anchor); | 
|---|
| 545 |     } | 
|---|
| 546 |     anchorDistances.add(distArray); | 
|---|
| 547 |   } | 
|---|
| 548 |    | 
|---|
| 549 |   /** | 
|---|
| 550 |    * Removes points from old anchors that | 
|---|
| 551 |    * are nearer to the given new anchor and | 
|---|
| 552 |    * adds them to the list of points of the | 
|---|
| 553 |    * new anchor.  | 
|---|
| 554 |    * @param newAnchor The new anchor. | 
|---|
| 555 |    * @param anchors The old anchors. | 
|---|
| 556 |    * @param anchorDistances The distances | 
|---|
| 557 |    * of new anchor to each of the old  | 
|---|
| 558 |    * anchors. | 
|---|
| 559 |    * @return true if any points are removed | 
|---|
| 560 |    * from the old anchors | 
|---|
| 561 |    */ | 
|---|
| 562 |   public boolean stealPoints(TempNode newAnchor, Vector anchors,  | 
|---|
| 563 |                           Vector anchorDistances) { | 
|---|
| 564 |                              | 
|---|
| 565 |     int maxIdx = -1;  | 
|---|
| 566 |     double maxDist = Double.NEGATIVE_INFINITY; | 
|---|
| 567 |     double[] distArray = (double[])anchorDistances.lastElement(); | 
|---|
| 568 |      | 
|---|
| 569 |     for(int i=0; i<distArray.length; i++) | 
|---|
| 570 |       if(maxDist < distArray[i]) { | 
|---|
| 571 |         maxDist = distArray[i]; maxIdx = i; | 
|---|
| 572 |       } | 
|---|
| 573 |      | 
|---|
| 574 |     boolean anyPointsStolen=false, pointsStolen=false; | 
|---|
| 575 |     TempNode anchorI; | 
|---|
| 576 |     double newDist, distI, interAnchMidDist; | 
|---|
| 577 |     Instance newAnchInst = newAnchor.anchor, anchIInst; | 
|---|
| 578 |     for(int i=0; i<anchors.size(); i++) { | 
|---|
| 579 |       anchorI = (TempNode)anchors.elementAt(i); | 
|---|
| 580 |       anchIInst = anchorI.anchor; | 
|---|
| 581 |        | 
|---|
| 582 |       pointsStolen = false; | 
|---|
| 583 |       interAnchMidDist = m_DistanceFunction.distance(newAnchInst, anchIInst)/2D; | 
|---|
| 584 |       for(int j=0; j<anchorI.points.length(); j++) { | 
|---|
| 585 |         ListNode tmp = (ListNode) anchorI.points.get(j); | 
|---|
| 586 |         //break if we reach a point whose distance is less than the midpoint | 
|---|
| 587 |         //of inter anchor distance | 
|---|
| 588 |         if(tmp.distance < interAnchMidDist) | 
|---|
| 589 |           break; | 
|---|
| 590 |         //else test if this point can be stolen by the new anchor | 
|---|
| 591 |         newDist = m_DistanceFunction.distance(newAnchInst,  | 
|---|
| 592 |                                               m_Instances.instance(tmp.idx)); | 
|---|
| 593 |         distI = tmp.distance; | 
|---|
| 594 |         if(newDist < distI) { | 
|---|
| 595 |           newAnchor.points.insertReverseSorted(tmp.idx, newDist); | 
|---|
| 596 |           anchorI.points.remove(j); | 
|---|
| 597 |           anyPointsStolen=pointsStolen=true; | 
|---|
| 598 |         } | 
|---|
| 599 |       } | 
|---|
| 600 |       if (pointsStolen) | 
|---|
| 601 |         anchorI.radius = ((ListNode)anchorI.points.getFirst()).distance; | 
|---|
| 602 |     }//end for | 
|---|
| 603 |     return anyPointsStolen; | 
|---|
| 604 |   }//end stealPoints() | 
|---|
| 605 |  | 
|---|
| 606 |   /** | 
|---|
| 607 |   /** | 
|---|
| 608 |    * Calculates the centroid pivot of a node based on its | 
|---|
| 609 |    * two child nodes (if merging two nodes). | 
|---|
| 610 |    * @param node1 The first child. | 
|---|
| 611 |    * @param node2 The second child. | 
|---|
| 612 |    * @param insts The set of instances on which the tree  | 
|---|
| 613 |    * is being built (as dataset header information is  | 
|---|
| 614 |    * required).  | 
|---|
| 615 |    * @return The centroid pivot of a node.  | 
|---|
| 616 |    */ | 
|---|
| 617 |   public Instance calcPivot(TempNode node1, TempNode node2, Instances insts) { | 
|---|
| 618 |     int classIdx = m_Instances.classIndex(); | 
|---|
| 619 |     double[] attrVals = new double[insts.numAttributes()]; | 
|---|
| 620 |     Instance temp; | 
|---|
| 621 |     double anchr1Ratio = (double)node1.points.length() /  | 
|---|
| 622 |                          (node1.points.length()+node2.points.length()), | 
|---|
| 623 |            anchr2Ratio = (double)node2.points.length() /  | 
|---|
| 624 |                          (node1.points.length()+node2.points.length());                         ; | 
|---|
| 625 |     for(int k=0; k<node1.anchor.numValues(); k++) { | 
|---|
| 626 |       if(node1.anchor.index(k)==classIdx) | 
|---|
| 627 |         continue; | 
|---|
| 628 |       attrVals[k] += node1.anchor.valueSparse(k)*anchr1Ratio; | 
|---|
| 629 |     } | 
|---|
| 630 |     for(int k=0; k<node2.anchor.numValues(); k++) { | 
|---|
| 631 |       if(node2.anchor.index(k)==classIdx) | 
|---|
| 632 |         continue; | 
|---|
| 633 |       attrVals[k] += node2.anchor.valueSparse(k)*anchr2Ratio; | 
|---|
| 634 |     } | 
|---|
| 635 |     temp = new DenseInstance(1.0, attrVals); | 
|---|
| 636 |     return temp; | 
|---|
| 637 |   } | 
|---|
| 638 |    | 
|---|
| 639 |   /** | 
|---|
| 640 |    * Calculates the centroid pivot of a node based on  | 
|---|
| 641 |    * the list of points that it  contains (tbe two  | 
|---|
| 642 |    * lists of its children are provided). | 
|---|
| 643 |    * @param list1 The point index list of first child. | 
|---|
| 644 |    * @param list2 The point index list of second  | 
|---|
| 645 |    * child. | 
|---|
| 646 |    * @param insts The insts object on which the tree  | 
|---|
| 647 |    * is being built (for header information).  | 
|---|
| 648 |    * @return The centroid pivot of the node.  | 
|---|
| 649 |    */ | 
|---|
| 650 |   public Instance calcPivot(MyIdxList list1, MyIdxList list2, Instances insts) { | 
|---|
| 651 |     int classIdx = m_Instances.classIndex(); | 
|---|
| 652 |     double[] attrVals = new double[insts.numAttributes()]; | 
|---|
| 653 |      | 
|---|
| 654 |     Instance temp; | 
|---|
| 655 |     for(int i=0; i<list1.length(); i++) { | 
|---|
| 656 |       temp = insts.instance(((ListNode)list1.get(i)).idx); | 
|---|
| 657 |       for(int k=0; k<temp.numValues(); k++) { | 
|---|
| 658 |         if(temp.index(k)==classIdx) | 
|---|
| 659 |           continue; | 
|---|
| 660 |         attrVals[k] += temp.valueSparse(k); | 
|---|
| 661 |       } | 
|---|
| 662 |     } | 
|---|
| 663 |     for(int j=0; j<list2.length(); j++) { | 
|---|
| 664 |       temp = insts.instance(((ListNode)list2.get(j)).idx); | 
|---|
| 665 |       for(int k=0; k<temp.numValues(); k++) { | 
|---|
| 666 |         if(temp.index(k)==classIdx) | 
|---|
| 667 |           continue; | 
|---|
| 668 |         attrVals[k] += temp.valueSparse(k); | 
|---|
| 669 |       } | 
|---|
| 670 |     } | 
|---|
| 671 |     for(int j=0, numInsts=list1.length()+list2.length();  | 
|---|
| 672 |         j < attrVals.length; j++) { | 
|---|
| 673 |       attrVals[j] /= numInsts; | 
|---|
| 674 |     } | 
|---|
| 675 |     temp = new DenseInstance(1.0, attrVals); | 
|---|
| 676 |     return temp; | 
|---|
| 677 |   } | 
|---|
| 678 |    | 
|---|
| 679 |   /**  | 
|---|
| 680 |    * Calculates the radius of a node based on its two  | 
|---|
| 681 |    * child nodes (if merging two nodes). | 
|---|
| 682 |    * @param n1 The first child of the node. | 
|---|
| 683 |    * @param n2 The second child of the node. | 
|---|
| 684 |    * @return The radius of the node.  | 
|---|
| 685 |    * @throws Exception | 
|---|
| 686 |    */ | 
|---|
| 687 |   public double calcRadius(TempNode n1, TempNode n2) { | 
|---|
| 688 |           Instance p1 = n1.anchor, p2 = n2.anchor; | 
|---|
| 689 |           double radius = n1.radius + m_DistanceFunction.distance(p1, p2) + n2.radius; | 
|---|
| 690 |           return radius/2; | 
|---|
| 691 |   } | 
|---|
| 692 |    | 
|---|
| 693 |   /** | 
|---|
| 694 |    * Calculates the radius of a node based on the | 
|---|
| 695 |    * list of points that it contains (the two lists of  | 
|---|
| 696 |    * its children are provided).  | 
|---|
| 697 |    * @param list1 The point index list of first child. | 
|---|
| 698 |    * @param list2 The point index list of second child. | 
|---|
| 699 |    * @param pivot The centre/pivot of the node. | 
|---|
| 700 |    * @param insts The instances on which the tree is  | 
|---|
| 701 |    * being built (for header info).  | 
|---|
| 702 |    * @return The radius of the node.  | 
|---|
| 703 |    */ | 
|---|
| 704 |   public double calcRadius(MyIdxList list1, MyIdxList list2,  | 
|---|
| 705 |                            Instance pivot, Instances insts) { | 
|---|
| 706 |     double radius = Double.NEGATIVE_INFINITY; | 
|---|
| 707 |      | 
|---|
| 708 |     for(int i=0; i<list1.length(); i++) { | 
|---|
| 709 |       double dist = m_DistanceFunction.distance(pivot,  | 
|---|
| 710 |                                               insts.instance(((ListNode)list1.get(i)).idx)); | 
|---|
| 711 |       if(dist>radius) | 
|---|
| 712 |         radius = dist; | 
|---|
| 713 |     } | 
|---|
| 714 |     for(int j=0; j<list2.length(); j++) { | 
|---|
| 715 |       double dist = m_DistanceFunction.distance(pivot,  | 
|---|
| 716 |                                               insts.instance(((ListNode)list2.get(j)).idx)); | 
|---|
| 717 |       if(dist>radius) | 
|---|
| 718 |         radius = dist; | 
|---|
| 719 |     } | 
|---|
| 720 |     return radius; | 
|---|
| 721 |   } | 
|---|
| 722 |    | 
|---|
| 723 |   /** | 
|---|
| 724 |    * Adds an instance to the tree. This implementation of  | 
|---|
| 725 |    * MiddleOutConstructor doesn't support addition of  | 
|---|
| 726 |    * instances to already built tree, hence it always | 
|---|
| 727 |    * throws an exception. | 
|---|
| 728 |    * @param node The root of the tree to which the  | 
|---|
| 729 |    * instance is to be added. | 
|---|
| 730 |    * @param inst The instance to add to the tree. | 
|---|
| 731 |    * @return The updated master index array after  | 
|---|
| 732 |    * adding the instance. | 
|---|
| 733 |    * @throws Exception Always as this implementation of | 
|---|
| 734 |    * MiddleOutConstructor doesn't support addition of | 
|---|
| 735 |    * instances after batch construction of the tree. | 
|---|
| 736 |    */ | 
|---|
| 737 |   public int[] addInstance(BallNode node, Instance inst) throws Exception { | 
|---|
| 738 |     throw new Exception("Addition of instances after the tree is built, not " + | 
|---|
| 739 |                         "possible with MiddleOutConstructor."); | 
|---|
| 740 |   } | 
|---|
| 741 |  | 
|---|
| 742 |   /** | 
|---|
| 743 |    * Sets the maximum number of instances allowed in a leaf. | 
|---|
| 744 |    * @param num The maximum number of instances allowed in  | 
|---|
| 745 |    * a leaf. | 
|---|
| 746 |    * @throws Exception If the num is < 2, as the method  | 
|---|
| 747 |    * cannot work for < 2 instances.  | 
|---|
| 748 |    */  | 
|---|
| 749 |   public void setMaxInstancesInLeaf(int num) throws Exception { | 
|---|
| 750 |     if(num<2) | 
|---|
| 751 |       throw new Exception("The maximum number of instances in a leaf for " + | 
|---|
| 752 |                           "using MiddleOutConstructor must be >=2."); | 
|---|
| 753 |     super.setMaxInstancesInLeaf(num); | 
|---|
| 754 |   }   | 
|---|
| 755 |  | 
|---|
| 756 |   /** | 
|---|
| 757 |    * Sets the instances on which the tree is to be built. | 
|---|
| 758 |    * @param insts The instances on which to build the  | 
|---|
| 759 |    * ball tree. | 
|---|
| 760 |    */ | 
|---|
| 761 |   public void setInstances(Instances insts) { | 
|---|
| 762 |     super.setInstances(insts); | 
|---|
| 763 |     rootRadius = -1; //this needs to be re-calculated by buildTree() | 
|---|
| 764 |   } | 
|---|
| 765 |    | 
|---|
| 766 |   /** | 
|---|
| 767 |    * Sets the master index array that points to  | 
|---|
| 768 |    * instances in m_Instances, so that only this array | 
|---|
| 769 |    * is manipulated, and m_Instances is left  | 
|---|
| 770 |    * untouched. | 
|---|
| 771 |    * @param instList The master index array. | 
|---|
| 772 |    */ | 
|---|
| 773 |   public void setInstanceList(int[] instList) { | 
|---|
| 774 |     super.setInstanceList(instList);  | 
|---|
| 775 |     rootRadius = -1; //this needs to be re-calculated by buildTree() | 
|---|
| 776 |   } | 
|---|
| 777 |    | 
|---|
| 778 |   /** | 
|---|
| 779 |    * Returns the tip text for this property. | 
|---|
| 780 |    *  | 
|---|
| 781 |    * @return            tip text for this property suitable for | 
|---|
| 782 |    *                    displaying in the explorer/experimenter gui | 
|---|
| 783 |    */ | 
|---|
| 784 |   public String initialAnchorRandomTipText() { | 
|---|
| 785 |     return "Whether the initial anchor is chosen randomly."; | 
|---|
| 786 |   } | 
|---|
| 787 |    | 
|---|
| 788 |   /**  | 
|---|
| 789 |    * Gets whether if the initial anchor is chosen randomly. | 
|---|
| 790 |    * @return true if the initial anchor is a random one.  | 
|---|
| 791 |    */ | 
|---|
| 792 |   public boolean isInitialAnchorRandom() { | 
|---|
| 793 |     return m_RandomInitialAnchor; | 
|---|
| 794 |   } | 
|---|
| 795 |    | 
|---|
| 796 |   /**  | 
|---|
| 797 |    * Sets whether if the initial anchor is chosen randomly. If not  | 
|---|
| 798 |    * then if it is the furthest point from the mean/centroid. | 
|---|
| 799 |    * @param randomInitialAnchor Should be true if the first  | 
|---|
| 800 |    * anchor is to be chosen randomly. | 
|---|
| 801 |    */ | 
|---|
| 802 |   public void setInitialAnchorRandom(boolean randomInitialAnchor) { | 
|---|
| 803 |     m_RandomInitialAnchor = randomInitialAnchor; | 
|---|
| 804 |   } | 
|---|
| 805 |    | 
|---|
| 806 |   /** | 
|---|
| 807 |    * Returns the tip text for this property. | 
|---|
| 808 |    *  | 
|---|
| 809 |    * @return tip text for this property suitable for | 
|---|
| 810 |    * displaying in the explorer/experimenter gui | 
|---|
| 811 |    */ | 
|---|
| 812 |   public String seedTipText() { | 
|---|
| 813 |     return "The seed value for the random number generator."; | 
|---|
| 814 |   } | 
|---|
| 815 |  | 
|---|
| 816 |   /** | 
|---|
| 817 |    * Returns the seed for random number generator. | 
|---|
| 818 |    * @return The random number seed. | 
|---|
| 819 |    */ | 
|---|
| 820 |   public int getSeed() { | 
|---|
| 821 |     return m_RSeed; | 
|---|
| 822 |   } | 
|---|
| 823 |    | 
|---|
| 824 |   /** | 
|---|
| 825 |    * Sets the seed for random number generator  | 
|---|
| 826 |    * (that is used for selecting the first anchor  | 
|---|
| 827 |    * point randomly). | 
|---|
| 828 |    * @param seed The seed.  | 
|---|
| 829 |    */ | 
|---|
| 830 |   public void setSeed(int seed) { | 
|---|
| 831 |     m_RSeed = seed; | 
|---|
| 832 |   } | 
|---|
| 833 |    | 
|---|
| 834 |   /** | 
|---|
| 835 |    * Returns an enumeration describing the available options. | 
|---|
| 836 |    *  | 
|---|
| 837 |    * @return            an enumeration of all the available options. | 
|---|
| 838 |    */ | 
|---|
| 839 |   public Enumeration listOptions() { | 
|---|
| 840 |     Vector<Option> newVector = new Vector<Option>(); | 
|---|
| 841 |      | 
|---|
| 842 |     newVector.addElement(new Option( | 
|---|
| 843 |         "\tThe seed for the random number generator used\n" | 
|---|
| 844 |         + "\tin selecting random anchor.\n" | 
|---|
| 845 |         + "(default: 1)",  | 
|---|
| 846 |         "S", 1, "-S <num>")); | 
|---|
| 847 |      | 
|---|
| 848 |     newVector.addElement(new Option( | 
|---|
| 849 |         "\tUse randomly chosen initial anchors.", | 
|---|
| 850 |         "R", 0, "-R")); | 
|---|
| 851 |      | 
|---|
| 852 |     return newVector.elements(); | 
|---|
| 853 |   } | 
|---|
| 854 |  | 
|---|
| 855 |   /** | 
|---|
| 856 |    * Parses a given list of options. | 
|---|
| 857 |    *  | 
|---|
| 858 |    <!-- options-start --> | 
|---|
| 859 |    * Valid options are: <p/> | 
|---|
| 860 |    *  | 
|---|
| 861 |    * <pre> -S <num> | 
|---|
| 862 |    *  The seed for the random number generator used | 
|---|
| 863 |    *  in selecting random anchor. | 
|---|
| 864 |    * (default: 1)</pre> | 
|---|
| 865 |    *  | 
|---|
| 866 |    * <pre> -R | 
|---|
| 867 |    *  Use randomly chosen initial anchors.</pre> | 
|---|
| 868 |    *  | 
|---|
| 869 |    <!-- options-end -->  | 
|---|
| 870 |    *  | 
|---|
| 871 |    * @param options     the list of options as an array of strings | 
|---|
| 872 |    * @throws Exception  if an option is not supported | 
|---|
| 873 |    **/ | 
|---|
| 874 |   public void setOptions(String[] options) | 
|---|
| 875 |     throws Exception { | 
|---|
| 876 |  | 
|---|
| 877 |     super.setOptions(options); | 
|---|
| 878 |     | 
|---|
| 879 |     String temp = Utils.getOption('S', options); | 
|---|
| 880 |     if(temp.length()>0) { | 
|---|
| 881 |       setSeed(Integer.parseInt(temp)); | 
|---|
| 882 |     } | 
|---|
| 883 |     else { | 
|---|
| 884 |       setSeed(1); | 
|---|
| 885 |     } | 
|---|
| 886 |      | 
|---|
| 887 |     setInitialAnchorRandom(Utils.getFlag('R', options)); | 
|---|
| 888 |   } | 
|---|
| 889 |  | 
|---|
| 890 |   /** | 
|---|
| 891 |    * Gets the current settings of this BallTree MiddleOutConstructor. | 
|---|
| 892 |    *  | 
|---|
| 893 |    * @return            an array of strings suitable for passing to setOptions | 
|---|
| 894 |    */ | 
|---|
| 895 |   public String[] getOptions() { | 
|---|
| 896 |     Vector<String>      result; | 
|---|
| 897 |     String[]            options; | 
|---|
| 898 |     int                 i; | 
|---|
| 899 |      | 
|---|
| 900 |     result = new Vector<String>(); | 
|---|
| 901 |      | 
|---|
| 902 |     options = super.getOptions(); | 
|---|
| 903 |     for (i = 0; i < options.length; i++) | 
|---|
| 904 |       result.add(options[i]); | 
|---|
| 905 |      | 
|---|
| 906 |     result.add("-S"); | 
|---|
| 907 |     result.add("" + getSeed()); | 
|---|
| 908 |      | 
|---|
| 909 |     if(isInitialAnchorRandom()) | 
|---|
| 910 |       result.add("-R"); | 
|---|
| 911 |      | 
|---|
| 912 |     return result.toArray(new String[result.size()]); | 
|---|
| 913 |   } | 
|---|
| 914 |    | 
|---|
| 915 |   /** | 
|---|
| 916 |    * Checks whether if the points in an index list | 
|---|
| 917 |    * are in some specified of the master index array.  | 
|---|
| 918 |    * @param list The point list. | 
|---|
| 919 |    * @param startidx The start of the portion in  | 
|---|
| 920 |    * master index array.  | 
|---|
| 921 |    * @param endidx The end of the portion in master | 
|---|
| 922 |    * index array. | 
|---|
| 923 |    * @throws Exception If some point in the point | 
|---|
| 924 |    * list is not in the specified portion of master | 
|---|
| 925 |    * index array.  | 
|---|
| 926 |    */ | 
|---|
| 927 |   public void checkIndicesList(MyIdxList list, int startidx, int endidx)  | 
|---|
| 928 |     throws Exception { | 
|---|
| 929 |      | 
|---|
| 930 |     boolean found; | 
|---|
| 931 |     ListNode node; | 
|---|
| 932 |     for(int i=0; i<list.size(); i++) { | 
|---|
| 933 |       node = (ListNode)list.get(i); | 
|---|
| 934 |       found=false; | 
|---|
| 935 |       for(int j=startidx; j<=endidx; j++) { | 
|---|
| 936 |         if(node.idx==m_InstList[j]) { | 
|---|
| 937 |           found=true;  | 
|---|
| 938 |           break; | 
|---|
| 939 |         } | 
|---|
| 940 |       } | 
|---|
| 941 |       if(!found) | 
|---|
| 942 |         throw new Exception("Error: Element "+node.idx+" of the list not in " + | 
|---|
| 943 |                             "the array." + | 
|---|
| 944 |                             "\nArray: "+printInsts(startidx, endidx)+ | 
|---|
| 945 |                             "\nList: "+printList(list)); | 
|---|
| 946 |     } | 
|---|
| 947 |   } | 
|---|
| 948 |    | 
|---|
| 949 |   /** | 
|---|
| 950 |    * For printing indices in some given portion | 
|---|
| 951 |    * of the master index array.  | 
|---|
| 952 |    * @param startIdx The start of the portion  | 
|---|
| 953 |    * in master index array.  | 
|---|
| 954 |    * @param endIdx The end of the portion in  | 
|---|
| 955 |    * master index array. | 
|---|
| 956 |    * @return The string containing the indices | 
|---|
| 957 |    * in specified portion of the master index  | 
|---|
| 958 |    * array.  | 
|---|
| 959 |    */ | 
|---|
| 960 |   public String printInsts(int startIdx, int endIdx) { | 
|---|
| 961 |     StringBuffer bf = new StringBuffer(); | 
|---|
| 962 |     try { | 
|---|
| 963 |       bf.append("i: "); | 
|---|
| 964 |       for (int i = startIdx; i <= endIdx; i++) { | 
|---|
| 965 |         if (i == startIdx) | 
|---|
| 966 |           bf.append("" + m_InstList[i]); | 
|---|
| 967 |         else | 
|---|
| 968 |           bf.append(", " + m_InstList[i]); | 
|---|
| 969 |       } | 
|---|
| 970 |     } catch (Exception ex) { | 
|---|
| 971 |       ex.printStackTrace(); | 
|---|
| 972 |     } | 
|---|
| 973 |     return bf.toString(); | 
|---|
| 974 |   } | 
|---|
| 975 |    | 
|---|
| 976 |   /** | 
|---|
| 977 |    * For printing indices in a given point list. | 
|---|
| 978 |    * @param points The point list. | 
|---|
| 979 |    * @return String containing indices of the | 
|---|
| 980 |    * points in the point list. | 
|---|
| 981 |    */ | 
|---|
| 982 |   public String printList(MyIdxList points) { | 
|---|
| 983 |     if(points==null || points.length()==0) return ""; | 
|---|
| 984 |     StringBuffer bf = new StringBuffer(); | 
|---|
| 985 |     try { | 
|---|
| 986 |       ListNode temp; | 
|---|
| 987 |       for(int i=0; i<points.size(); i++) { | 
|---|
| 988 |         temp = (ListNode) points.get(i); | 
|---|
| 989 |         if(i==0) | 
|---|
| 990 |           bf.append(""+temp.idx); | 
|---|
| 991 |         else | 
|---|
| 992 |           bf.append(", "+temp.idx); | 
|---|
| 993 |       } | 
|---|
| 994 |     } catch(Exception ex) { ex.printStackTrace(); } | 
|---|
| 995 |     return bf.toString(); | 
|---|
| 996 |   } | 
|---|
| 997 |    | 
|---|
| 998 |   /** | 
|---|
| 999 |    * Returns the revision string. | 
|---|
| 1000 |    *  | 
|---|
| 1001 |    * @return            the revision | 
|---|
| 1002 |    */ | 
|---|
| 1003 |   public String getRevision() { | 
|---|
| 1004 |     return RevisionUtils.extract("$Revision: 5987 $"); | 
|---|
| 1005 |   } | 
|---|
| 1006 |    | 
|---|
| 1007 |   /**  | 
|---|
| 1008 |    * Temp class to represent either a leaf node or an internal node. Should only  | 
|---|
| 1009 |    * have two children (could be the case one child is an instance and the  | 
|---|
| 1010 |    * other another node). Primarily used for anchor nodes. It stores the | 
|---|
| 1011 |    * points contained in a node together with their distances to the  | 
|---|
| 1012 |    * node's centre/anchor point. | 
|---|
| 1013 |    *  | 
|---|
| 1014 |    * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) | 
|---|
| 1015 |    * @version $Revision: 5987 $ | 
|---|
| 1016 |    */ | 
|---|
| 1017 |   protected class TempNode | 
|---|
| 1018 |     implements RevisionHandler { | 
|---|
| 1019 |      | 
|---|
| 1020 |     /** The anchor point of the node. */ | 
|---|
| 1021 |     Instance anchor; | 
|---|
| 1022 |      | 
|---|
| 1023 |     /** The index of the anchor point. */ | 
|---|
| 1024 |     int idx; | 
|---|
| 1025 |      | 
|---|
| 1026 |     /** The radius of the node. */ | 
|---|
| 1027 |     double radius; | 
|---|
| 1028 |      | 
|---|
| 1029 |     /** The list of points inside the node. */ | 
|---|
| 1030 |     MyIdxList points; | 
|---|
| 1031 |      | 
|---|
| 1032 |     /** Node's left child. */ | 
|---|
| 1033 |     TempNode left; | 
|---|
| 1034 |  | 
|---|
| 1035 |     /** Node's right child. */ | 
|---|
| 1036 |     TempNode right; | 
|---|
| 1037 |      | 
|---|
| 1038 |     /** | 
|---|
| 1039 |      * Returns a string represention of the node. | 
|---|
| 1040 |      * @return The string representation of the  | 
|---|
| 1041 |      * node. | 
|---|
| 1042 |      */ | 
|---|
| 1043 |     public String toString() { | 
|---|
| 1044 |       if(points==null || points.length()==0) return idx+""; | 
|---|
| 1045 |       StringBuffer bf = new StringBuffer(); | 
|---|
| 1046 |       try { | 
|---|
| 1047 |         bf.append(idx+" p: "); | 
|---|
| 1048 |         ListNode temp;  | 
|---|
| 1049 |         for(int i=0; i<points.size(); i++) { | 
|---|
| 1050 |           temp = (ListNode) points.get(i); | 
|---|
| 1051 |           if(i==0) | 
|---|
| 1052 |             bf.append(""+temp.idx); | 
|---|
| 1053 |           else | 
|---|
| 1054 |             bf.append(", "+temp.idx); | 
|---|
| 1055 |         } | 
|---|
| 1056 |       } catch(Exception ex) { ex.printStackTrace(); } | 
|---|
| 1057 |       return bf.toString(); | 
|---|
| 1058 |     } | 
|---|
| 1059 |      | 
|---|
| 1060 |     /** | 
|---|
| 1061 |      * Returns the revision string. | 
|---|
| 1062 |      *  | 
|---|
| 1063 |      * @return          the revision | 
|---|
| 1064 |      */ | 
|---|
| 1065 |     public String getRevision() { | 
|---|
| 1066 |       return RevisionUtils.extract("$Revision: 5987 $"); | 
|---|
| 1067 |     } | 
|---|
| 1068 |   } | 
|---|
| 1069 |  | 
|---|
| 1070 |   /** | 
|---|
| 1071 |    * An element of MyIdxList. It stores a points index, and  | 
|---|
| 1072 |    * its distance to some specific point (usually a node's | 
|---|
| 1073 |    * anchor point).  | 
|---|
| 1074 |    *  | 
|---|
| 1075 |    * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) | 
|---|
| 1076 |    * @version $Revision: 5987 $ | 
|---|
| 1077 |    */ | 
|---|
| 1078 |   protected class ListNode | 
|---|
| 1079 |     implements RevisionHandler { | 
|---|
| 1080 |      | 
|---|
| 1081 |     /** The index of the point. */ | 
|---|
| 1082 |     int idx = -1; | 
|---|
| 1083 |      | 
|---|
| 1084 |     /** The distance of the point to the anchor.*/ | 
|---|
| 1085 |     double distance = Double.NEGATIVE_INFINITY; | 
|---|
| 1086 |      | 
|---|
| 1087 |     /** | 
|---|
| 1088 |      * Constructor.  | 
|---|
| 1089 |      * @param i The point's index.  | 
|---|
| 1090 |      * @param d The point's distance to the  | 
|---|
| 1091 |      * anchor. | 
|---|
| 1092 |      */ | 
|---|
| 1093 |     public ListNode(int i, double d) { | 
|---|
| 1094 |       idx = i; | 
|---|
| 1095 |       distance = d; | 
|---|
| 1096 |     } | 
|---|
| 1097 |      | 
|---|
| 1098 |     /** | 
|---|
| 1099 |      * Returns the revision string. | 
|---|
| 1100 |      *  | 
|---|
| 1101 |      * @return          the revision | 
|---|
| 1102 |      */ | 
|---|
| 1103 |     public String getRevision() { | 
|---|
| 1104 |       return RevisionUtils.extract("$Revision: 5987 $"); | 
|---|
| 1105 |     } | 
|---|
| 1106 |   } | 
|---|
| 1107 |    | 
|---|
| 1108 |   /** | 
|---|
| 1109 |    * Class implementing a list. It stores indices of  | 
|---|
| 1110 |    * instances/points, together with their distances to a nodes | 
|---|
| 1111 |    * centre/pivot/anchor, in a (reverse sorted) list.   | 
|---|
| 1112 |    *  | 
|---|
| 1113 |    * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) | 
|---|
| 1114 |    * @version $Revision: 5987 $ | 
|---|
| 1115 |    */ | 
|---|
| 1116 |   protected class MyIdxList implements Serializable, RevisionHandler { | 
|---|
| 1117 |  | 
|---|
| 1118 |     /** for serialization. */ | 
|---|
| 1119 |     private static final long serialVersionUID = -2283869109722934927L;     | 
|---|
| 1120 |      | 
|---|
| 1121 |     /** The array list backing this list */ | 
|---|
| 1122 |     protected ArrayList<ListNode> m_List; | 
|---|
| 1123 |      | 
|---|
| 1124 |     /** | 
|---|
| 1125 |      * Constructor. | 
|---|
| 1126 |      */ | 
|---|
| 1127 |     public MyIdxList() { | 
|---|
| 1128 |       m_List = new ArrayList<ListNode>(); | 
|---|
| 1129 |     } | 
|---|
| 1130 |  | 
|---|
| 1131 |     /** | 
|---|
| 1132 |      * Constructor for given capacity. | 
|---|
| 1133 |      */ | 
|---|
| 1134 |     public MyIdxList(int capacity) { | 
|---|
| 1135 |       m_List = new ArrayList<ListNode>(capacity); | 
|---|
| 1136 |     } | 
|---|
| 1137 |  | 
|---|
| 1138 |     /** | 
|---|
| 1139 |      * Returns the first element in the list. | 
|---|
| 1140 |      * @return The list's first element. | 
|---|
| 1141 |      */ | 
|---|
| 1142 |     public ListNode getFirst() { | 
|---|
| 1143 |       return m_List.get(0); | 
|---|
| 1144 |     } | 
|---|
| 1145 |      | 
|---|
| 1146 |     /** | 
|---|
| 1147 |      * Inserts an element in reverse sorted order in  | 
|---|
| 1148 |      * the list. | 
|---|
| 1149 |      * @param idx The index of the point to insert. | 
|---|
| 1150 |      * @param distance The distance of the point to | 
|---|
| 1151 |      * a node's anchor (this would be used to  | 
|---|
| 1152 |      * determine the sort order). | 
|---|
| 1153 |      */ | 
|---|
| 1154 |     public void insertReverseSorted(final int idx, final double distance) { | 
|---|
| 1155 |  | 
|---|
| 1156 |       int i=0; | 
|---|
| 1157 |       for (ListNode temp : m_List) { | 
|---|
| 1158 |         if(temp.distance < distance) | 
|---|
| 1159 |           break; | 
|---|
| 1160 |         i++; | 
|---|
| 1161 |       } | 
|---|
| 1162 |       m_List.add(i, new ListNode(idx, distance)); | 
|---|
| 1163 |     } | 
|---|
| 1164 |      | 
|---|
| 1165 |     /** | 
|---|
| 1166 |      * Returns an element at the specified index in  | 
|---|
| 1167 |      * the list.  | 
|---|
| 1168 |      * @param index The index of the element in the  | 
|---|
| 1169 |      * list. | 
|---|
| 1170 |      * @return The element at the given index. | 
|---|
| 1171 |      */ | 
|---|
| 1172 |     public ListNode get(int index) { | 
|---|
| 1173 |       return m_List.get(index); | 
|---|
| 1174 |     } | 
|---|
| 1175 |      | 
|---|
| 1176 |     /**  | 
|---|
| 1177 |      * Removes an element at the specified index  | 
|---|
| 1178 |      * from the list. | 
|---|
| 1179 |      * @param index The index of the element | 
|---|
| 1180 |      * in the list to remove. | 
|---|
| 1181 |      */ | 
|---|
| 1182 |     public void remove(int index) { | 
|---|
| 1183 |       m_List.remove(index); | 
|---|
| 1184 |     } | 
|---|
| 1185 |      | 
|---|
| 1186 |     /** | 
|---|
| 1187 |      * Returns the size of the list. | 
|---|
| 1188 |      * @return The size of the list. | 
|---|
| 1189 |      */ | 
|---|
| 1190 |     public int length() { | 
|---|
| 1191 |       return m_List.size(); | 
|---|
| 1192 |     } | 
|---|
| 1193 |      | 
|---|
| 1194 |     /** | 
|---|
| 1195 |      * Returns the size of the list. | 
|---|
| 1196 |      * @return The size of the list. | 
|---|
| 1197 |      */ | 
|---|
| 1198 |     public int size() { | 
|---|
| 1199 |       return m_List.size(); | 
|---|
| 1200 |     } | 
|---|
| 1201 |      | 
|---|
| 1202 |     /** | 
|---|
| 1203 |      * Appends one list at the end of the other.  | 
|---|
| 1204 |      * @param list1 The list to which the other | 
|---|
| 1205 |      * list would be appended. | 
|---|
| 1206 |      * @param list2 The list to append to the  | 
|---|
| 1207 |      * other list. | 
|---|
| 1208 |      * @return The new list with list2 appended  | 
|---|
| 1209 |      * to list1. | 
|---|
| 1210 |      */ | 
|---|
| 1211 |     public MyIdxList append(MyIdxList list1, MyIdxList list2) { | 
|---|
| 1212 |       MyIdxList temp = new MyIdxList(list1.size()+list2.size()); | 
|---|
| 1213 |       temp.m_List.addAll(list1.m_List); | 
|---|
| 1214 |       temp.m_List.addAll(list2.m_List); | 
|---|
| 1215 |       return temp; | 
|---|
| 1216 |     } | 
|---|
| 1217 |      | 
|---|
| 1218 |     /** | 
|---|
| 1219 |      * Checks the sorting of a list. | 
|---|
| 1220 |      * @param list The list whose sorting is | 
|---|
| 1221 |      * to be checked. | 
|---|
| 1222 |      * @throws Exception If the list is not | 
|---|
| 1223 |      * in (reverse) sorted order. | 
|---|
| 1224 |      */ | 
|---|
| 1225 |     public void checkSorting(MyIdxList list) throws Exception { | 
|---|
| 1226 |       Iterator<ListNode> en = m_List.iterator(); | 
|---|
| 1227 |       ListNode first=null, second=null; | 
|---|
| 1228 |       while(en.hasNext()) { | 
|---|
| 1229 |         if(first==null) | 
|---|
| 1230 |           first = (ListNode) en.next(); | 
|---|
| 1231 |         else { | 
|---|
| 1232 |           second = (ListNode)en.next(); | 
|---|
| 1233 |           if(first.distance < second.distance) | 
|---|
| 1234 |             throw new Exception("List not sorted correctly." + | 
|---|
| 1235 |                                 " first.distance: " + first.distance + | 
|---|
| 1236 |                                 " second.distance: " + second.distance + | 
|---|
| 1237 |                                 " Please check code.");             | 
|---|
| 1238 |         } | 
|---|
| 1239 |       } | 
|---|
| 1240 |     } | 
|---|
| 1241 |      | 
|---|
| 1242 |     /** | 
|---|
| 1243 |      * Returns the revision string. | 
|---|
| 1244 |      *  | 
|---|
| 1245 |      * @return          the revision | 
|---|
| 1246 |      */ | 
|---|
| 1247 |     public String getRevision() { | 
|---|
| 1248 |       return RevisionUtils.extract("$Revision: 5987 $"); | 
|---|
| 1249 |     } | 
|---|
| 1250 |   } | 
|---|
| 1251 | } | 
|---|