source: tags/MetisMQIDemo/src/main/java/weka/core/neighboursearch/balltrees/MiddleOutConstructor.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 39.7 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * MiddleOutConstructor.java
19 * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
20 */
21
22package weka.core.neighboursearch.balltrees;
23
24import weka.core.Instance;
25import weka.core.DenseInstance;
26import weka.core.Instances;
27import weka.core.Option;
28import weka.core.Randomizable;
29import weka.core.RevisionHandler;
30import weka.core.RevisionUtils;
31import weka.core.TechnicalInformation;
32import weka.core.TechnicalInformationHandler;
33import weka.core.Utils;
34import weka.core.TechnicalInformation.Field;
35import weka.core.TechnicalInformation.Type;
36
37import java.util.Enumeration;
38import java.util.Random;
39import java.util.Vector;
40import java.util.ArrayList;
41import java.util.Iterator;
42
43import java.io.Serializable;
44
45/**
46 <!-- globalinfo-start -->
47 * The class that builds a BallTree middle out.<br/>
48 * <br/>
49 * For more information see also:<br/>
50 * <br/>
51 * Andrew W. Moore: The Anchors Hierarchy: Using the Triangle Inequality to Survive High Dimensional Data. In: UAI '00: Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence, San Francisco, CA, USA, 397-405, 2000.<br/>
52 * <br/>
53 * Ashraf Masood Kibriya (2007). Fast Algorithms for Nearest Neighbour Search. Hamilton, New Zealand.
54 * <p/>
55 <!-- globalinfo-end -->
56 *
57 <!-- technical-bibtex-start -->
58 * BibTeX:
59 * <pre>
60 * &#64;inproceedings{Moore2000,
61 *    address = {San Francisco, CA, USA},
62 *    author = {Andrew W. Moore},
63 *    booktitle = {UAI '00: Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence},
64 *    pages = {397-405},
65 *    publisher = {Morgan Kaufmann Publishers Inc.},
66 *    title = {The Anchors Hierarchy: Using the Triangle Inequality to Survive High Dimensional Data},
67 *    year = {2000}
68 * }
69 *
70 * &#64;mastersthesis{Kibriya2007,
71 *    address = {Hamilton, New Zealand},
72 *    author = {Ashraf Masood Kibriya},
73 *    school = {Department of Computer Science, School of Computing and Mathematical Sciences, University of Waikato},
74 *    title = {Fast Algorithms for Nearest Neighbour Search},
75 *    year = {2007}
76 * }
77 * </pre>
78 * <p/>
79 <!-- technical-bibtex-end -->
80 *
81 <!-- options-start -->
82 * Valid options are: <p/>
83 *
84 * <pre> -S &lt;num&gt;
85 *  The seed for the random number generator used
86 *  in selecting random anchor.
87 * (default: 1)</pre>
88 *
89 * <pre> -R
90 *  Use randomly chosen initial anchors.</pre>
91 *
92 <!-- options-end -->
93 *
94 * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
95 * @version $Revision: 5987 $
96 */
97public class MiddleOutConstructor
98  extends BallTreeConstructor
99  implements Randomizable, TechnicalInformationHandler {
100
101  /** for serialization. */
102  private static final long serialVersionUID = -8523314263062524462L;
103
104  /** Seed form random number generator. */
105  protected int m_RSeed = 1;
106       
107  /**
108   * The random number generator for selecting
109   * the first anchor point randomly
110   * (if selecting randomly).
111   */
112  protected Random rand = new Random(m_RSeed);
113 
114  /**
115   * The radius of the smallest ball enclosing all the data points.
116   */
117  private double rootRadius = -1;
118 
119  /**
120   * True if the initial anchor is chosen randomly. False if it is the furthest
121   * point from the mean/centroid.
122   */
123  protected boolean m_RandomInitialAnchor = true;
124
125  /**
126   * Creates a new instance of MiddleOutConstructor.
127   */
128  public MiddleOutConstructor() {
129  }
130
131  /**
132   * Returns a string describing this nearest neighbour search algorithm.
133   *
134   * @return            a description of the algorithm for displaying in the
135   *                    explorer/experimenter gui
136   */
137  public String globalInfo() {
138    return 
139        "The class that builds a BallTree middle out.\n\n"
140      + "For more information see also:\n\n"
141      + getTechnicalInformation().toString();
142  }
143   
144  /**
145   * Returns an instance of a TechnicalInformation object, containing detailed
146   * information about the technical background of this class, e.g., paper
147   * reference or book this class is based on.
148   *
149   * @return            the technical information about this class
150   */
151  public TechnicalInformation getTechnicalInformation() {
152    TechnicalInformation result;
153    TechnicalInformation additional;
154
155    result = new TechnicalInformation(Type.INPROCEEDINGS);
156    result.setValue(Field.AUTHOR, "Andrew W. Moore");
157    result.setValue(Field.TITLE, "The Anchors Hierarchy: Using the Triangle Inequality to Survive High Dimensional Data");
158    result.setValue(Field.YEAR, "2000");
159    result.setValue(Field.BOOKTITLE, "UAI '00: Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence");
160    result.setValue(Field.PAGES, "397-405");
161    result.setValue(Field.PUBLISHER, "Morgan Kaufmann Publishers Inc.");
162    result.setValue(Field.ADDRESS, "San Francisco, CA, USA");
163
164    additional = result.add(Type.MASTERSTHESIS);
165    additional.setValue(Field.AUTHOR, "Ashraf Masood Kibriya");
166    additional.setValue(Field.TITLE, "Fast Algorithms for Nearest Neighbour Search");
167    additional.setValue(Field.YEAR, "2007");
168    additional.setValue(Field.SCHOOL, "Department of Computer Science, School of Computing and Mathematical Sciences, University of Waikato");
169    additional.setValue(Field.ADDRESS, "Hamilton, New Zealand");
170   
171    return result;
172  }
173
174  /**
175   * Builds a ball tree middle out.
176   * @return The root node of the tree.
177   * @throws Exception If there is problem building
178   * the tree.
179   */
180  public BallNode buildTree() throws Exception {
181    m_NumNodes = m_MaxDepth = m_NumLeaves = 0;
182    if(rootRadius == -1) {
183      rootRadius = BallNode.calcRadius(m_InstList, m_Instances, 
184                                BallNode.calcCentroidPivot(m_InstList, m_Instances),
185                                m_DistanceFunction);
186    }
187    BallNode root = buildTreeMiddleOut(0, m_Instances.numInstances()-1);
188    return root;
189  }
190
191  /**
192   * Builds a ball tree middle out from the
193   * portion of the master index array given
194   * by supplied start and end index.
195   * @param startIdx The start of the portion
196   * in master index array.
197   * @param endIdx the end of the portion in
198   * master index array.
199   * @return The root node of the built tree.
200   * @throws Exception If there is some
201   * problem building the tree.
202   */
203  protected BallNode buildTreeMiddleOut(int startIdx, int endIdx) 
204    throws Exception {
205       
206    Instance pivot;
207    double radius;
208    Vector<TempNode> anchors;
209    int numInsts = endIdx - startIdx + 1;
210    int numAnchors = (int) Math.round(Math.sqrt(numInsts));
211   
212    //create anchor's hierarchy
213    if (numAnchors > 1) {
214      pivot = BallNode.calcCentroidPivot(startIdx, endIdx, m_InstList,m_Instances);
215      radius = BallNode.calcRadius(startIdx, endIdx, m_InstList, m_Instances, 
216                                                           pivot, m_DistanceFunction);     
217      if(numInsts <= m_MaxInstancesInLeaf || 
218         (rootRadius==0 ? true : radius/rootRadius < m_MaxRelLeafRadius)) { //just make a leaf don't make anchors hierarchy
219                BallNode node = new BallNode(startIdx, endIdx, m_NumNodes,pivot, radius);
220                return node;
221          }
222      anchors = new Vector<TempNode>(numAnchors);
223      createAnchorsHierarchy(anchors, numAnchors, startIdx, endIdx);
224
225      BallNode node = mergeNodes(anchors, startIdx, endIdx);
226     
227      buildLeavesMiddleOut(node);
228     
229      return node;
230    }// end anchors hierarchy
231    else {
232      BallNode node = new BallNode(startIdx, endIdx, m_NumNodes, 
233                      (pivot=BallNode.calcCentroidPivot(startIdx, endIdx, 
234                                                      m_InstList, m_Instances)), 
235                      BallNode.calcRadius(startIdx, endIdx, m_InstList, 
236                                          m_Instances, pivot, 
237                                          m_DistanceFunction)
238                         );
239      return node;
240    }       
241  }
242 
243  /**
244   * Creates an anchors hierarchy from a portion
245   * of master index array.
246   *
247   * @param anchors The vector for putting the anchors
248   * into.
249   * @param numAnchors The number of anchors to create.
250   * @param startIdx The start of the portion of master
251   * index array.
252   * @param endIdx The end of the portion of master
253   * index array.
254   * @throws Exception If there is some problem in creating
255   * the hierarchy.
256   */
257  protected void createAnchorsHierarchy(Vector<TempNode> anchors, final int numAnchors, 
258      final int startIdx, final int endIdx) 
259    throws Exception {
260   
261    TempNode anchr1 = m_RandomInitialAnchor ? 
262            getRandomAnchor(startIdx, endIdx) : 
263            getFurthestFromMeanAnchor(startIdx, endIdx);
264                     
265    TempNode amax = anchr1; //double maxradius = anchr1.radius;
266    TempNode newAnchor;
267    Vector<double[]> anchorDistances = new Vector<double[]>(numAnchors-1);
268    anchors.add(anchr1);
269
270    //creating anchors
271    while(anchors.size() < numAnchors) {
272      //create new anchor
273      newAnchor = new TempNode();
274      newAnchor.points = new MyIdxList();       
275      Instance newpivot = m_Instances.instance(((ListNode)amax.points.getFirst()).idx);
276      newAnchor.anchor = newpivot;
277      newAnchor.idx = ((ListNode)amax.points.getFirst()).idx;
278
279      setInterAnchorDistances(anchors, newAnchor, anchorDistances);
280      if(stealPoints(newAnchor, anchors, anchorDistances)) //if points stolen
281        newAnchor.radius = ((ListNode)newAnchor.points.getFirst()).distance;
282      else
283        newAnchor.radius = 0.0;
284      anchors.add(newAnchor);
285
286      //find new amax       
287      amax = (TempNode)anchors.elementAt(0);
288      for(int i=1; i<anchors.size(); i++) {
289        newAnchor = (TempNode)anchors.elementAt(i);
290        if(newAnchor.radius > amax.radius)
291          amax = newAnchor;
292      }//end for
293    }//end while
294  }
295 
296  /**
297   * Applies the middle out build procedure to
298   * the leaves of the tree. The leaf nodes
299   * should be the ones that were created by
300   * createAnchorsHierarchy(). The process
301   * continues recursively for the leaves
302   * created for each leaf of the given tree
303   * until for some leaf node <=
304   * m_MaxInstancesInLeaf instances remain
305   * in the leaf.
306   *
307   * @param node The root of the tree.
308   * @throws Exception If there is some problem
309   * in building the tree leaves.
310   */
311  protected void buildLeavesMiddleOut(BallNode node) throws Exception {
312    if(node.m_Left!=null && node.m_Right!=null) { //if an internal node
313      buildLeavesMiddleOut(node.m_Left);
314      buildLeavesMiddleOut(node.m_Right);
315    }
316    else if(node.m_Left!=null || node.m_Right!=null) {
317      throw new Exception("Invalid leaf assignment. Please check code");
318    }
319    else { //if node is a leaf
320      BallNode n2 = buildTreeMiddleOut(node.m_Start, node.m_End);
321      if(n2.m_Left!=null && n2.m_Right!=null) {
322        node.m_Left = n2.m_Left;
323        node.m_Right = n2.m_Right;
324        buildLeavesMiddleOut(node); 
325        //the stopping condition in buildTreeMiddleOut will stop the recursion,
326        //where it won't split a node at all, and we won't recurse here.
327      }
328      else if(n2.m_Left!=null || n2.m_Right!=null)
329        throw new Exception("Invalid leaf assignment. Please check code");
330    }
331  }
332
333  /**
334   * Merges nodes created by createAnchorsHierarchy()
335   * into one top node.
336   *
337   * @param list List of anchor nodes.
338   * @param startIdx The start of the portion of
339   * master index array containing these anchor
340   * nodes.
341   * @param endIdx The end of the portion of master
342   * index array containing these anchor nodes.
343   * @return The top/root node after merging
344   * the given anchor nodes.
345   * @throws Exception IF there is some problem in
346   * merging.
347   */
348  protected BallNode mergeNodes(Vector<TempNode> list, int startIdx, int endIdx)
349    throws Exception {
350   
351    for(int i=0; i<list.size(); i++) {
352      TempNode n = (TempNode) list.get(i);
353      n.anchor = calcPivot(n.points, new MyIdxList(), m_Instances);
354      n.radius = calcRadius(n.points, new MyIdxList(), n.anchor, m_Instances);
355    }
356    double minRadius, tmpRadius; //tmpVolume, minVolume;
357    Instance pivot, minPivot=null;
358    TempNode parent; int min1=-1, min2=-1;   
359   
360    while(list.size() > 1) { //main merging loop
361      minRadius=Double.POSITIVE_INFINITY;     
362     
363      for(int i=0; i<list.size(); i++) {
364        TempNode first = (TempNode) list.get(i);
365        for(int j=i+1; j<list.size(); j++) {
366          TempNode second = (TempNode) list.get(j);
367          pivot = calcPivot(first, second, m_Instances); 
368          tmpRadius = calcRadius(first, second); //calcRadius(first.points, second.points, pivot, m_Instances);   
369          if(tmpRadius < minRadius) { //(tmpVolume < minVolume) {
370            minRadius = tmpRadius; //minVolume = tmpVolume;
371            minPivot = pivot; 
372            min1=i; min2=j; 
373            //minInstList = tmpInstList;
374          }
375        }//end for(j)
376      }//end for(i)
377      parent = new TempNode();
378      parent.left  = (TempNode) list.get(min1);
379      parent.right = (TempNode) list.get(min2);
380      parent.anchor = minPivot;
381      parent.radius = calcRadius(parent.left.points, parent.right.points, minPivot, m_Instances); //minRadius;
382      parent.points = parent.left.points.append(parent.left.points, parent.right.points);
383      list.remove(min1); list.remove(min2-1);
384      list.add(parent);
385    }//end while
386    TempNode tmpRoot = (TempNode)list.get(list.size()-1);
387   
388    if((endIdx-startIdx+1)!= tmpRoot.points.length()) {
389      throw new Exception("Root nodes instance list is of irregular length. " +
390                          "Please check code. Length should " +
391                          "be: " + (endIdx-startIdx+1) + 
392                          " whereas it is found to be: "+tmpRoot.points.length());
393    }
394    for(int i=0; i<tmpRoot.points.length(); i++) {
395      m_InstList[startIdx+i] = ((ListNode)tmpRoot.points.get(i)).idx;
396    }
397   
398    BallNode node = makeBallTreeNodes(tmpRoot, startIdx, endIdx, 0);
399   
400    return node;   
401  }
402 
403  /**
404   * Makes BallTreeNodes out of TempNodes.
405   * 
406   * @param node The root TempNode
407   * @param startidx The start of the portion of
408   * master index array the TempNodes
409   * are made from.
410   * @param endidx The end of the portion of
411   * master index array the TempNodes are
412   * made from.
413   * @param depth The depth in the tree where
414   * this root TempNode is made (needed when
415   * leaves of a tree deeper down are built
416   * middle out).
417   * @return The root BallTreeNode.
418   */
419  protected BallNode makeBallTreeNodes(TempNode node, int startidx, 
420      int endidx, int depth) {
421    BallNode ball=null;
422   
423    if(node.left!=null && node.right!=null) { //make an internal node
424      ball = new BallNode(
425      startidx, endidx, m_NumNodes, 
426      node.anchor,
427      node.radius
428      );
429      m_NumNodes += 1;
430      ball.m_Left = makeBallTreeNodes(node.left, startidx, startidx+node.left.points.length()-1, depth+1);
431      ball.m_Right= makeBallTreeNodes(node.right, startidx+node.left.points.length(), endidx, depth+1);
432      m_MaxDepth++;
433    }
434    else { //make a leaf node
435      ball = new BallNode(startidx, endidx, m_NumNodes,       
436      node.anchor, 
437      node.radius 
438                         );
439      m_NumNodes += 1;     
440      m_NumLeaves += 1;
441    }
442    return ball;
443  }
444   
445  /**
446   * Returns an anchor point which is furthest from the
447   * mean point for a given set of points (instances)
448   * (The anchor instance is chosen from the given
449   * set of points).
450   *
451   * @param startIdx The start index of the points
452   * for which anchor point is required.
453   * @param endIdx The end index of the points for
454   * which anchor point is required.
455   * @return The furthest point/instance from the mean
456   * of given set of points.
457   */
458  protected TempNode getFurthestFromMeanAnchor(int startIdx, int endIdx) {
459    TempNode anchor = new TempNode();
460    Instance centroid = BallNode.calcCentroidPivot(startIdx, endIdx, m_InstList, 
461                                                   m_Instances);
462    Instance temp;
463    double tmpr;
464    anchor.radius = Double.NEGATIVE_INFINITY;
465    for(int i=startIdx; i<=endIdx; i++) {
466      temp = m_Instances.instance(m_InstList[i]);
467      tmpr = m_DistanceFunction.distance(centroid, temp);
468      if(tmpr > anchor.radius) {
469        anchor.idx = m_InstList[i];
470        anchor.anchor = temp;
471        anchor.radius = tmpr;
472      }
473    }
474   
475    setPoints(anchor, startIdx, endIdx, m_InstList);
476    return anchor;
477  }
478 
479  /**
480   * Returns a random anchor point/instance from a
481   * given set of points/instances.
482   *
483   * @param startIdx The start index of the points
484   * for which anchor is required.
485   * @param endIdx The end index of the points for
486   * which anchor is required.
487   * @return The random anchor point/instance
488   * for the given set of
489   */
490  protected TempNode getRandomAnchor(int startIdx, int endIdx) {
491    TempNode anchr1 = new TempNode();
492    anchr1.idx = m_InstList[startIdx+rand.nextInt((endIdx-startIdx+1))];
493    anchr1.anchor = m_Instances.instance(anchr1.idx);
494    setPoints(anchr1, startIdx, endIdx, m_InstList);
495    anchr1.radius = ((ListNode)anchr1.points.getFirst()).distance;
496   
497    return anchr1;
498  }
499 
500  /**
501   * Sets the points of an anchor node. It takes the
502   * indices of points from the given portion of
503   * an index array and stores those indices, together
504   * with their distances to the given anchor node,
505   * in the point index list of the anchor node.
506   * 
507   * @param node The node in which the points are
508   * needed to be set.
509   * @param startIdx The start of the portion in
510   * the given index array (the master index
511   * array).
512   * @param endIdx The end of the portion in the
513   * given index array.
514   * @param indices The index array.
515   */
516  public void setPoints(TempNode node, int startIdx, int endIdx, int[] indices) {
517    node.points = new MyIdxList();   
518    Instance temp; double dist;
519    for(int i=startIdx; i<=endIdx; i++) {
520      temp = m_Instances.instance(indices[i]);
521      dist = m_DistanceFunction.distance(node.anchor, temp);
522      node.points.insertReverseSorted(indices[i], dist);
523    }
524  }
525
526  /**
527   * Sets the distances of a supplied new
528   * anchor to all the rest of the
529   * previous anchor points.
530   * @param anchors The old anchor points.
531   * @param newAnchor The new anchor point.
532   * @param anchorDistances The vector to
533   * store the distances of newAnchor to
534   * each of the old anchors.
535   * @throws Exception If there is some
536   * problem in calculating the distances.
537   */
538  public void setInterAnchorDistances(Vector<TempNode> anchors, TempNode newAnchor,
539                                      Vector<double[]> anchorDistances) throws Exception {
540    double[] distArray = new double[anchors.size()];
541   
542    for(int i=0; i<anchors.size(); i++) {
543      Instance anchr = ((TempNode)anchors.elementAt(i)).anchor;
544      distArray[i] = m_DistanceFunction.distance(anchr, newAnchor.anchor);
545    }
546    anchorDistances.add(distArray);
547  }
548 
549  /**
550   * Removes points from old anchors that
551   * are nearer to the given new anchor and
552   * adds them to the list of points of the
553   * new anchor.
554   * @param newAnchor The new anchor.
555   * @param anchors The old anchors.
556   * @param anchorDistances The distances
557   * of new anchor to each of the old
558   * anchors.
559   * @return true if any points are removed
560   * from the old anchors
561   */
562  public boolean stealPoints(TempNode newAnchor, Vector anchors, 
563                          Vector anchorDistances) {
564                           
565    int maxIdx = -1; 
566    double maxDist = Double.NEGATIVE_INFINITY;
567    double[] distArray = (double[])anchorDistances.lastElement();
568   
569    for(int i=0; i<distArray.length; i++)
570      if(maxDist < distArray[i]) {
571        maxDist = distArray[i]; maxIdx = i;
572      }
573   
574    boolean anyPointsStolen=false, pointsStolen=false;
575    TempNode anchorI;
576    double newDist, distI, interAnchMidDist;
577    Instance newAnchInst = newAnchor.anchor, anchIInst;
578    for(int i=0; i<anchors.size(); i++) {
579      anchorI = (TempNode)anchors.elementAt(i);
580      anchIInst = anchorI.anchor;
581     
582      pointsStolen = false;
583      interAnchMidDist = m_DistanceFunction.distance(newAnchInst, anchIInst)/2D;
584      for(int j=0; j<anchorI.points.length(); j++) {
585        ListNode tmp = (ListNode) anchorI.points.get(j);
586        //break if we reach a point whose distance is less than the midpoint
587        //of inter anchor distance
588        if(tmp.distance < interAnchMidDist)
589          break;
590        //else test if this point can be stolen by the new anchor
591        newDist = m_DistanceFunction.distance(newAnchInst, 
592                                              m_Instances.instance(tmp.idx));
593        distI = tmp.distance;
594        if(newDist < distI) {
595          newAnchor.points.insertReverseSorted(tmp.idx, newDist);
596          anchorI.points.remove(j);
597          anyPointsStolen=pointsStolen=true;
598        }
599      }
600      if (pointsStolen)
601        anchorI.radius = ((ListNode)anchorI.points.getFirst()).distance;
602    }//end for
603    return anyPointsStolen;
604  }//end stealPoints()
605
606  /**
607  /**
608   * Calculates the centroid pivot of a node based on its
609   * two child nodes (if merging two nodes).
610   * @param node1 The first child.
611   * @param node2 The second child.
612   * @param insts The set of instances on which the tree
613   * is being built (as dataset header information is
614   * required).
615   * @return The centroid pivot of a node.
616   */
617  public Instance calcPivot(TempNode node1, TempNode node2, Instances insts) {
618    int classIdx = m_Instances.classIndex();
619    double[] attrVals = new double[insts.numAttributes()];
620    Instance temp;
621    double anchr1Ratio = (double)node1.points.length() / 
622                         (node1.points.length()+node2.points.length()),
623           anchr2Ratio = (double)node2.points.length() / 
624                         (node1.points.length()+node2.points.length());                         ;
625    for(int k=0; k<node1.anchor.numValues(); k++) {
626      if(node1.anchor.index(k)==classIdx)
627        continue;
628      attrVals[k] += node1.anchor.valueSparse(k)*anchr1Ratio;
629    }
630    for(int k=0; k<node2.anchor.numValues(); k++) {
631      if(node2.anchor.index(k)==classIdx)
632        continue;
633      attrVals[k] += node2.anchor.valueSparse(k)*anchr2Ratio;
634    }
635    temp = new DenseInstance(1.0, attrVals);
636    return temp;
637  }
638 
639  /**
640   * Calculates the centroid pivot of a node based on
641   * the list of points that it  contains (tbe two
642   * lists of its children are provided).
643   * @param list1 The point index list of first child.
644   * @param list2 The point index list of second
645   * child.
646   * @param insts The insts object on which the tree
647   * is being built (for header information).
648   * @return The centroid pivot of the node.
649   */
650  public Instance calcPivot(MyIdxList list1, MyIdxList list2, Instances insts) {
651    int classIdx = m_Instances.classIndex();
652    double[] attrVals = new double[insts.numAttributes()];
653   
654    Instance temp;
655    for(int i=0; i<list1.length(); i++) {
656      temp = insts.instance(((ListNode)list1.get(i)).idx);
657      for(int k=0; k<temp.numValues(); k++) {
658        if(temp.index(k)==classIdx)
659          continue;
660        attrVals[k] += temp.valueSparse(k);
661      }
662    }
663    for(int j=0; j<list2.length(); j++) {
664      temp = insts.instance(((ListNode)list2.get(j)).idx);
665      for(int k=0; k<temp.numValues(); k++) {
666        if(temp.index(k)==classIdx)
667          continue;
668        attrVals[k] += temp.valueSparse(k);
669      }
670    }
671    for(int j=0, numInsts=list1.length()+list2.length(); 
672        j < attrVals.length; j++) {
673      attrVals[j] /= numInsts;
674    }
675    temp = new DenseInstance(1.0, attrVals);
676    return temp;
677  }
678 
679  /**
680   * Calculates the radius of a node based on its two
681   * child nodes (if merging two nodes).
682   * @param n1 The first child of the node.
683   * @param n2 The second child of the node.
684   * @return The radius of the node.
685   * @throws Exception
686   */
687  public double calcRadius(TempNode n1, TempNode n2) {
688          Instance p1 = n1.anchor, p2 = n2.anchor;
689          double radius = n1.radius + m_DistanceFunction.distance(p1, p2) + n2.radius;
690          return radius/2;
691  }
692 
693  /**
694   * Calculates the radius of a node based on the
695   * list of points that it contains (the two lists of
696   * its children are provided).
697   * @param list1 The point index list of first child.
698   * @param list2 The point index list of second child.
699   * @param pivot The centre/pivot of the node.
700   * @param insts The instances on which the tree is
701   * being built (for header info).
702   * @return The radius of the node.
703   */
704  public double calcRadius(MyIdxList list1, MyIdxList list2, 
705                           Instance pivot, Instances insts) {
706    double radius = Double.NEGATIVE_INFINITY;
707   
708    for(int i=0; i<list1.length(); i++) {
709      double dist = m_DistanceFunction.distance(pivot, 
710                                              insts.instance(((ListNode)list1.get(i)).idx));
711      if(dist>radius)
712        radius = dist;
713    }
714    for(int j=0; j<list2.length(); j++) {
715      double dist = m_DistanceFunction.distance(pivot, 
716                                              insts.instance(((ListNode)list2.get(j)).idx));
717      if(dist>radius)
718        radius = dist;
719    }
720    return radius;
721  }
722 
723  /**
724   * Adds an instance to the tree. This implementation of
725   * MiddleOutConstructor doesn't support addition of
726   * instances to already built tree, hence it always
727   * throws an exception.
728   * @param node The root of the tree to which the
729   * instance is to be added.
730   * @param inst The instance to add to the tree.
731   * @return The updated master index array after
732   * adding the instance.
733   * @throws Exception Always as this implementation of
734   * MiddleOutConstructor doesn't support addition of
735   * instances after batch construction of the tree.
736   */
737  public int[] addInstance(BallNode node, Instance inst) throws Exception {
738    throw new Exception("Addition of instances after the tree is built, not " +
739                        "possible with MiddleOutConstructor.");
740  }
741
742  /**
743   * Sets the maximum number of instances allowed in a leaf.
744   * @param num The maximum number of instances allowed in
745   * a leaf.
746   * @throws Exception If the num is < 2, as the method
747   * cannot work for < 2 instances.
748   */ 
749  public void setMaxInstancesInLeaf(int num) throws Exception {
750    if(num<2)
751      throw new Exception("The maximum number of instances in a leaf for " +
752                          "using MiddleOutConstructor must be >=2.");
753    super.setMaxInstancesInLeaf(num);
754  } 
755
756  /**
757   * Sets the instances on which the tree is to be built.
758   * @param insts The instances on which to build the
759   * ball tree.
760   */
761  public void setInstances(Instances insts) {
762    super.setInstances(insts);
763    rootRadius = -1; //this needs to be re-calculated by buildTree()
764  }
765 
766  /**
767   * Sets the master index array that points to
768   * instances in m_Instances, so that only this array
769   * is manipulated, and m_Instances is left
770   * untouched.
771   * @param instList The master index array.
772   */
773  public void setInstanceList(int[] instList) {
774    super.setInstanceList(instList); 
775    rootRadius = -1; //this needs to be re-calculated by buildTree()
776  }
777 
778  /**
779   * Returns the tip text for this property.
780   *
781   * @return            tip text for this property suitable for
782   *                    displaying in the explorer/experimenter gui
783   */
784  public String initialAnchorRandomTipText() {
785    return "Whether the initial anchor is chosen randomly.";
786  }
787 
788  /**
789   * Gets whether if the initial anchor is chosen randomly.
790   * @return true if the initial anchor is a random one.
791   */
792  public boolean isInitialAnchorRandom() {
793    return m_RandomInitialAnchor;
794  }
795 
796  /**
797   * Sets whether if the initial anchor is chosen randomly. If not
798   * then if it is the furthest point from the mean/centroid.
799   * @param randomInitialAnchor Should be true if the first
800   * anchor is to be chosen randomly.
801   */
802  public void setInitialAnchorRandom(boolean randomInitialAnchor) {
803    m_RandomInitialAnchor = randomInitialAnchor;
804  }
805 
806  /**
807   * Returns the tip text for this property.
808   *
809   * @return tip text for this property suitable for
810   * displaying in the explorer/experimenter gui
811   */
812  public String seedTipText() {
813    return "The seed value for the random number generator.";
814  }
815
816  /**
817   * Returns the seed for random number generator.
818   * @return The random number seed.
819   */
820  public int getSeed() {
821    return m_RSeed;
822  }
823 
824  /**
825   * Sets the seed for random number generator
826   * (that is used for selecting the first anchor
827   * point randomly).
828   * @param seed The seed.
829   */
830  public void setSeed(int seed) {
831    m_RSeed = seed;
832  }
833 
834  /**
835   * Returns an enumeration describing the available options.
836   *
837   * @return            an enumeration of all the available options.
838   */
839  public Enumeration listOptions() {
840    Vector<Option> newVector = new Vector<Option>();
841   
842    newVector.addElement(new Option(
843        "\tThe seed for the random number generator used\n"
844        + "\tin selecting random anchor.\n"
845        + "(default: 1)", 
846        "S", 1, "-S <num>"));
847   
848    newVector.addElement(new Option(
849        "\tUse randomly chosen initial anchors.",
850        "R", 0, "-R"));
851   
852    return newVector.elements();
853  }
854
855  /**
856   * Parses a given list of options.
857   *
858   <!-- options-start -->
859   * Valid options are: <p/>
860   *
861   * <pre> -S &lt;num&gt;
862   *  The seed for the random number generator used
863   *  in selecting random anchor.
864   * (default: 1)</pre>
865   *
866   * <pre> -R
867   *  Use randomly chosen initial anchors.</pre>
868   *
869   <!-- options-end -->
870   *
871   * @param options     the list of options as an array of strings
872   * @throws Exception  if an option is not supported
873   **/
874  public void setOptions(String[] options)
875    throws Exception {
876
877    super.setOptions(options);
878   
879    String temp = Utils.getOption('S', options);
880    if(temp.length()>0) {
881      setSeed(Integer.parseInt(temp));
882    }
883    else {
884      setSeed(1);
885    }
886   
887    setInitialAnchorRandom(Utils.getFlag('R', options));
888  }
889
890  /**
891   * Gets the current settings of this BallTree MiddleOutConstructor.
892   *
893   * @return            an array of strings suitable for passing to setOptions
894   */
895  public String[] getOptions() {
896    Vector<String>      result;
897    String[]            options;
898    int                 i;
899   
900    result = new Vector<String>();
901   
902    options = super.getOptions();
903    for (i = 0; i < options.length; i++)
904      result.add(options[i]);
905   
906    result.add("-S");
907    result.add("" + getSeed());
908   
909    if(isInitialAnchorRandom())
910      result.add("-R");
911   
912    return result.toArray(new String[result.size()]);
913  }
914 
915  /**
916   * Checks whether if the points in an index list
917   * are in some specified of the master index array.
918   * @param list The point list.
919   * @param startidx The start of the portion in
920   * master index array.
921   * @param endidx The end of the portion in master
922   * index array.
923   * @throws Exception If some point in the point
924   * list is not in the specified portion of master
925   * index array.
926   */
927  public void checkIndicesList(MyIdxList list, int startidx, int endidx) 
928    throws Exception {
929   
930    boolean found;
931    ListNode node;
932    for(int i=0; i<list.size(); i++) {
933      node = (ListNode)list.get(i);
934      found=false;
935      for(int j=startidx; j<=endidx; j++) {
936        if(node.idx==m_InstList[j]) {
937          found=true; 
938          break;
939        }
940      }
941      if(!found)
942        throw new Exception("Error: Element "+node.idx+" of the list not in " +
943                            "the array." +
944                            "\nArray: "+printInsts(startidx, endidx)+
945                            "\nList: "+printList(list));
946    }
947  }
948 
949  /**
950   * For printing indices in some given portion
951   * of the master index array.
952   * @param startIdx The start of the portion
953   * in master index array.
954   * @param endIdx The end of the portion in
955   * master index array.
956   * @return The string containing the indices
957   * in specified portion of the master index
958   * array.
959   */
960  public String printInsts(int startIdx, int endIdx) {
961    StringBuffer bf = new StringBuffer();
962    try {
963      bf.append("i: ");
964      for (int i = startIdx; i <= endIdx; i++) {
965        if (i == startIdx)
966          bf.append("" + m_InstList[i]);
967        else
968          bf.append(", " + m_InstList[i]);
969      }
970    } catch (Exception ex) {
971      ex.printStackTrace();
972    }
973    return bf.toString();
974  }
975 
976  /**
977   * For printing indices in a given point list.
978   * @param points The point list.
979   * @return String containing indices of the
980   * points in the point list.
981   */
982  public String printList(MyIdxList points) {
983    if(points==null || points.length()==0) return "";
984    StringBuffer bf = new StringBuffer();
985    try {
986      ListNode temp;
987      for(int i=0; i<points.size(); i++) {
988        temp = (ListNode) points.get(i);
989        if(i==0)
990          bf.append(""+temp.idx);
991        else
992          bf.append(", "+temp.idx);
993      }
994    } catch(Exception ex) { ex.printStackTrace(); }
995    return bf.toString();
996  }
997 
998  /**
999   * Returns the revision string.
1000   *
1001   * @return            the revision
1002   */
1003  public String getRevision() {
1004    return RevisionUtils.extract("$Revision: 5987 $");
1005  }
1006 
1007  /**
1008   * Temp class to represent either a leaf node or an internal node. Should only
1009   * have two children (could be the case one child is an instance and the
1010   * other another node). Primarily used for anchor nodes. It stores the
1011   * points contained in a node together with their distances to the
1012   * node's centre/anchor point.
1013   *
1014   * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
1015   * @version $Revision: 5987 $
1016   */
1017  protected class TempNode
1018    implements RevisionHandler {
1019   
1020    /** The anchor point of the node. */
1021    Instance anchor;
1022   
1023    /** The index of the anchor point. */
1024    int idx;
1025   
1026    /** The radius of the node. */
1027    double radius;
1028   
1029    /** The list of points inside the node. */
1030    MyIdxList points;
1031   
1032    /** Node's left child. */
1033    TempNode left;
1034
1035    /** Node's right child. */
1036    TempNode right;
1037   
1038    /**
1039     * Returns a string represention of the node.
1040     * @return The string representation of the
1041     * node.
1042     */
1043    public String toString() {
1044      if(points==null || points.length()==0) return idx+"";
1045      StringBuffer bf = new StringBuffer();
1046      try {
1047        bf.append(idx+" p: ");
1048        ListNode temp; 
1049        for(int i=0; i<points.size(); i++) {
1050          temp = (ListNode) points.get(i);
1051          if(i==0)
1052            bf.append(""+temp.idx);
1053          else
1054            bf.append(", "+temp.idx);
1055        }
1056      } catch(Exception ex) { ex.printStackTrace(); }
1057      return bf.toString();
1058    }
1059   
1060    /**
1061     * Returns the revision string.
1062     *
1063     * @return          the revision
1064     */
1065    public String getRevision() {
1066      return RevisionUtils.extract("$Revision: 5987 $");
1067    }
1068  }
1069
1070  /**
1071   * An element of MyIdxList. It stores a points index, and
1072   * its distance to some specific point (usually a node's
1073   * anchor point).
1074   *
1075   * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
1076   * @version $Revision: 5987 $
1077   */
1078  protected class ListNode
1079    implements RevisionHandler {
1080   
1081    /** The index of the point. */
1082    int idx = -1;
1083   
1084    /** The distance of the point to the anchor.*/
1085    double distance = Double.NEGATIVE_INFINITY;
1086   
1087    /**
1088     * Constructor.
1089     * @param i The point's index.
1090     * @param d The point's distance to the
1091     * anchor.
1092     */
1093    public ListNode(int i, double d) {
1094      idx = i;
1095      distance = d;
1096    }
1097   
1098    /**
1099     * Returns the revision string.
1100     *
1101     * @return          the revision
1102     */
1103    public String getRevision() {
1104      return RevisionUtils.extract("$Revision: 5987 $");
1105    }
1106  }
1107 
1108  /**
1109   * Class implementing a list. It stores indices of
1110   * instances/points, together with their distances to a nodes
1111   * centre/pivot/anchor, in a (reverse sorted) list. 
1112   *
1113   * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
1114   * @version $Revision: 5987 $
1115   */
1116  protected class MyIdxList implements Serializable, RevisionHandler {
1117
1118    /** for serialization. */
1119    private static final long serialVersionUID = -2283869109722934927L;   
1120   
1121    /** The array list backing this list */
1122    protected ArrayList<ListNode> m_List;
1123   
1124    /**
1125     * Constructor.
1126     */
1127    public MyIdxList() {
1128      m_List = new ArrayList<ListNode>();
1129    }
1130
1131    /**
1132     * Constructor for given capacity.
1133     */
1134    public MyIdxList(int capacity) {
1135      m_List = new ArrayList<ListNode>(capacity);
1136    }
1137
1138    /**
1139     * Returns the first element in the list.
1140     * @return The list's first element.
1141     */
1142    public ListNode getFirst() {
1143      return m_List.get(0);
1144    }
1145   
1146    /**
1147     * Inserts an element in reverse sorted order in
1148     * the list.
1149     * @param idx The index of the point to insert.
1150     * @param distance The distance of the point to
1151     * a node's anchor (this would be used to
1152     * determine the sort order).
1153     */
1154    public void insertReverseSorted(final int idx, final double distance) {
1155
1156      int i=0;
1157      for (ListNode temp : m_List) {
1158        if(temp.distance < distance)
1159          break;
1160        i++;
1161      }
1162      m_List.add(i, new ListNode(idx, distance));
1163    }
1164   
1165    /**
1166     * Returns an element at the specified index in
1167     * the list.
1168     * @param index The index of the element in the
1169     * list.
1170     * @return The element at the given index.
1171     */
1172    public ListNode get(int index) {
1173      return m_List.get(index);
1174    }
1175   
1176    /**
1177     * Removes an element at the specified index
1178     * from the list.
1179     * @param index The index of the element
1180     * in the list to remove.
1181     */
1182    public void remove(int index) {
1183      m_List.remove(index);
1184    }
1185   
1186    /**
1187     * Returns the size of the list.
1188     * @return The size of the list.
1189     */
1190    public int length() {
1191      return m_List.size();
1192    }
1193   
1194    /**
1195     * Returns the size of the list.
1196     * @return The size of the list.
1197     */
1198    public int size() {
1199      return m_List.size();
1200    }
1201   
1202    /**
1203     * Appends one list at the end of the other.
1204     * @param list1 The list to which the other
1205     * list would be appended.
1206     * @param list2 The list to append to the
1207     * other list.
1208     * @return The new list with list2 appended
1209     * to list1.
1210     */
1211    public MyIdxList append(MyIdxList list1, MyIdxList list2) {
1212      MyIdxList temp = new MyIdxList(list1.size()+list2.size());
1213      temp.m_List.addAll(list1.m_List);
1214      temp.m_List.addAll(list2.m_List);
1215      return temp;
1216    }
1217   
1218    /**
1219     * Checks the sorting of a list.
1220     * @param list The list whose sorting is
1221     * to be checked.
1222     * @throws Exception If the list is not
1223     * in (reverse) sorted order.
1224     */
1225    public void checkSorting(MyIdxList list) throws Exception {
1226      Iterator<ListNode> en = m_List.iterator();
1227      ListNode first=null, second=null;
1228      while(en.hasNext()) {
1229        if(first==null)
1230          first = (ListNode) en.next();
1231        else {
1232          second = (ListNode)en.next();
1233          if(first.distance < second.distance)
1234            throw new Exception("List not sorted correctly." +
1235                                " first.distance: " + first.distance +
1236                                " second.distance: " + second.distance +
1237                                " Please check code.");           
1238        }
1239      }
1240    }
1241   
1242    /**
1243     * Returns the revision string.
1244     *
1245     * @return          the revision
1246     */
1247    public String getRevision() {
1248      return RevisionUtils.extract("$Revision: 5987 $");
1249    }
1250  }
1251}
Note: See TracBrowser for help on using the repository browser.