source: tags/MetisMQIDemo/src/main/java/weka/core/neighboursearch/balltrees/BottomUpConstructor.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 12.6 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * BottomUpConstructor.java
19 * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
20 */
21
22package weka.core.neighboursearch.balltrees;
23
24import weka.core.Instance;
25import weka.core.DenseInstance;
26import weka.core.Instances;
27import weka.core.RevisionHandler;
28import weka.core.RevisionUtils;
29import weka.core.TechnicalInformation;
30import weka.core.TechnicalInformationHandler;
31import weka.core.TechnicalInformation.Field;
32import weka.core.TechnicalInformation.Type;
33
34import java.util.ArrayList;
35
36/**
37 <!-- globalinfo-start -->
38 * The class that constructs a ball tree bottom up.
39 * <p/>
40 <!-- globalinfo-end -->
41 *
42 <!-- technical-bibtex-start -->
43 * BibTeX:
44 * <pre>
45 * &#64;techreport{Omohundro1989,
46 *    author = {Stephen M. Omohundro},
47 *    institution = {International Computer Science Institute},
48 *    month = {December},
49 *    number = {TR-89-063},
50 *    title = {Five Balltree Construction Algorithms},
51 *    year = {1989}
52 * }
53 * </pre>
54 * <p/>
55 <!-- technical-bibtex-end -->
56 *
57 <!-- options-start -->
58 * Valid options are: <p/>
59 *
60 * <pre> -N &lt;value&gt;
61 *  Set maximum number of instances in a leaf node
62 *  (default: 40)</pre>
63 *
64 * <pre> -R
65 *  Set internal nodes' radius to the sum
66 *  of the child balls radii. So that it
67 * contains the child balls.</pre>
68 *
69 <!-- options-end -->
70 *
71 * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
72 * @version $Revision: 5987 $
73 */
74public class BottomUpConstructor
75  extends BallTreeConstructor
76  implements TechnicalInformationHandler {
77 
78  /** for serialization. */
79  private static final long serialVersionUID = 5864250777657707687L;
80
81  /**
82   * Returns a string describing this nearest neighbour search algorithm.
83   *
84   * @return            a description of the algorithm for displaying in the
85   *                    explorer/experimenter gui
86   */
87  public String globalInfo() {
88    return "The class that constructs a ball tree bottom up.";
89  }
90
91  /**
92   * Returns an instance of a TechnicalInformation object, containing detailed
93   * information about the technical background of this class, e.g., paper
94   * reference or book this class is based on.
95   *
96   * @return            the technical information about this class
97   */
98  public TechnicalInformation getTechnicalInformation() {
99    TechnicalInformation result;
100
101    result = new TechnicalInformation(Type.TECHREPORT);
102    result.setValue(Field.AUTHOR, "Stephen M. Omohundro");
103    result.setValue(Field.YEAR, "1989");
104    result.setValue(Field.TITLE, "Five Balltree Construction Algorithms");
105    result.setValue(Field.MONTH, "December");
106    result.setValue(Field.NUMBER, "TR-89-063");
107    result.setValue(Field.INSTITUTION, "International Computer Science Institute");
108
109    return result;
110  }
111
112  /**
113   * Creates a new instance of BottomUpConstructor.
114   */
115  public BottomUpConstructor() {
116  }
117
118  /**
119   * Builds the ball tree bottom up.
120   * @return The root node of the tree.
121   * @throws Exception If there is problem building
122   * the tree.
123   */
124  public BallNode buildTree() throws Exception {
125    ArrayList<TempNode> list = new ArrayList<TempNode>();
126   
127    for(int i=0; i<m_InstList.length; i++) {
128      TempNode n = new TempNode();
129      n.points = new int[1]; n.points[0] = m_InstList[i];
130      n.anchor = m_Instances.instance(m_InstList[i]);
131      n.radius = 0.0;
132      list.add(n);
133    }
134   
135    return mergeNodes(list, 0, m_InstList.length-1, m_InstList);
136  }
137
138  /**
139   * Merges nodes into one top node.
140   * 
141   * @param list List of bottom most nodes (the actual
142   * instances).
143   * @param startIdx The index marking the start of
144   * the portion of master index array containing
145   * instances that need to be merged.
146   * @param endIdx The index marking the end of
147   * the portion of master index array containing
148   * instances that need to be merged.
149   * @param instList The master index array.
150   * @return The root node of the tree resulting
151   * from merging of bottom most nodes.
152   * @throws Exception If there is some problem
153   * merging the nodes.
154   */
155  protected BallNode mergeNodes(ArrayList<TempNode> list, int startIdx, int endIdx, 
156                                int[] instList) throws Exception {
157    double minRadius=Double.POSITIVE_INFINITY, tmpRadius;
158    Instance pivot, minPivot=null; int min1=-1, min2=-1;
159    int [] minInstList=null; int merge=1;
160    TempNode parent;
161   
162    while(list.size() > 1) { //main merging loop
163      System.err.print("merge step: "+merge+++"               \r");
164      minRadius = Double.POSITIVE_INFINITY;
165      min1 = -1; min2 = -1; 
166   
167      for(int i=0; i<list.size(); i++) {
168        TempNode first = (TempNode) list.get(i);
169        for(int j=i+1; j<list.size(); j++) {
170          TempNode second = (TempNode) list.get(j);
171          pivot = calcPivot(first, second, m_Instances);
172          tmpRadius = calcRadius(first, second); 
173          if(tmpRadius < minRadius) {
174            minRadius = tmpRadius; 
175            min1=i; min2=j;
176            minPivot = pivot;
177          }
178        }//end for(j)
179      }//end for(i)
180      parent = new TempNode();
181      parent.left  = (TempNode) list.get(min1);
182      parent.right = (TempNode) list.get(min2);
183      minInstList = new int[parent.left.points.length+parent.right.points.length]; 
184      System.arraycopy(parent.left.points, 0, minInstList, 0, parent.left.points.length);
185      System.arraycopy(parent.right.points, 0, minInstList, parent.left.points.length, 
186                       parent.right.points.length);
187      parent.points = minInstList;
188      parent.anchor = minPivot;
189      parent.radius = BallNode.calcRadius(parent.points, m_Instances, minPivot, m_DistanceFunction);
190      list.remove(min1); list.remove(min2-1);
191      list.add(parent);
192    }//end while
193    System.err.println("");
194    TempNode tmpRoot = (TempNode)list.get(0);
195   
196    if(m_InstList.length != tmpRoot.points.length)
197      throw new Exception("Root nodes instance list is of irregular length. " +
198                          "Please check code.");
199    System.arraycopy(tmpRoot.points, 0, m_InstList, 0, tmpRoot.points.length);
200
201    m_NumNodes = m_MaxDepth = m_NumLeaves = 0;
202    tmpRadius = BallNode.calcRadius(instList, m_Instances, tmpRoot.anchor, m_DistanceFunction);   
203    BallNode node = makeBallTree(tmpRoot, startIdx, endIdx, instList, 0, tmpRadius); 
204   
205    return node;   
206  }
207 
208  /**
209   * Makes ball tree nodes of temp nodes that were used
210   * in the merging process.
211   * @param node The temp root node.
212   * @param startidx The index marking the start of the
213   * portion of master index array containing instances
214   * to be merged.
215   * @param endidx The index marking the end of the
216   * portion of master index array containing instances
217   * to be merged.
218   * @param instList The master index array.
219   * @param depth The depth of the provided temp node.
220   * @param rootRadius The smallest ball enclosing all
221   * data points.
222   * @return The proper top BallTreeNode.
223   * @throws Exception If there is some problem.
224   */
225  protected BallNode makeBallTree(TempNode node, int startidx, int endidx, 
226                                int[] instList, int depth, final double rootRadius) throws Exception {
227    BallNode ball=null;
228    Instance pivot;
229   
230    if(m_MaxDepth < depth)
231      m_MaxDepth = depth;
232   
233    if(node.points.length > m_MaxInstancesInLeaf && 
234       (rootRadius==0 ? false : node.radius/rootRadius >= m_MaxRelLeafRadius) && 
235       node.left!=null && node.right!=null) { //make an internal node
236      ball = new BallNode(
237      startidx, endidx, m_NumNodes, 
238      (pivot=BallNode.calcCentroidPivot(startidx, endidx, instList, m_Instances)),
239      BallNode.calcRadius(startidx, endidx, instList, m_Instances, pivot, 
240                          m_DistanceFunction)
241      );
242      m_NumNodes += 1;
243      ball.m_Left = makeBallTree(node.left, startidx, startidx+node.left.points.length-1, instList, depth+1, rootRadius);
244      ball.m_Right= makeBallTree(node.right, startidx+node.left.points.length, endidx, instList, depth+1, rootRadius);
245    }
246    else { //make a leaf node
247      ball = new BallNode(startidx, endidx, m_NumNodes,       
248      (pivot=BallNode.calcCentroidPivot(startidx, endidx, instList, m_Instances)),
249      BallNode.calcRadius(startidx, endidx, instList, m_Instances, pivot, 
250                          m_DistanceFunction)
251                         );
252      m_NumNodes += 1;
253      m_NumLeaves++;
254    }
255    return ball;
256  }
257 
258  /**
259   * Adds an instance to the ball tree.
260   * @param node The root node of the tree.
261   * @param inst The instance to add to the tree.
262   * @return The new master index array after adding the
263   * instance.
264   * @throws Exception Always as BottomUpConstructor
265   * does not allow addition of instances after batch
266   * construction.
267   */
268 
269  public int[] addInstance(BallNode node, Instance inst) throws Exception {
270    throw new Exception("BottomUpConstruction method does not allow addition " +
271                        "of new Instances.");
272  }
273
274  /**
275   * Calculates the centroid pivot of a node based on its
276   * two child nodes.
277   * @param node1 The first child node.
278   * @param node2 The second child node.
279   * @param insts The instance on which the tree is to be
280   * built.
281   * @return The centre/pivot of the node.
282   * @throws Exception If there is some problem calculating
283   * the centre/pivot of the node.
284   */
285  public Instance calcPivot(TempNode node1, TempNode node2, Instances insts) 
286  throws Exception {
287    int classIdx = m_Instances.classIndex();
288    double[] attrVals = new double[insts.numAttributes()];
289    Instance temp;
290    double anchr1Ratio = (double)node1.points.length / 
291    (node1.points.length+node2.points.length),
292    anchr2Ratio = (double)node2.points.length / 
293    (node1.points.length+node2.points.length);                         
294    for(int k=0; k<node1.anchor.numValues(); k++) {
295      if(node1.anchor.index(k)==classIdx)
296        continue;
297      attrVals[k] += node1.anchor.valueSparse(k)*anchr1Ratio;
298    }
299    for(int k=0; k<node2.anchor.numValues(); k++) {
300      if(node2.anchor.index(k)==classIdx)
301        continue;
302      attrVals[k] += node2.anchor.valueSparse(k)*anchr2Ratio;
303    }
304    temp = new DenseInstance(1.0, attrVals);
305    return temp;
306  }
307 
308  /**
309   * Calculates the radius of a node based on its two
310   * child nodes.
311   * @param n1 The first child node.
312   * @param n2 The second child node.
313   * @return The calculated radius of the the node.
314   * @throws Exception If there is some problem
315   * in calculating the radius.
316   */
317  public double calcRadius(TempNode n1, TempNode n2) throws Exception {
318    Instance a1 = n1.anchor, a2 = n2.anchor;
319    double radius = n1.radius + m_DistanceFunction.distance(a1, a2) + n2.radius;
320    return radius/2;
321  }
322
323  /**
324   * Temp class to represent either a leaf node or an internal node. Should only
325   * have two children (could be the case one child is an instance and the
326   * other another node).
327   *
328   * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
329   * @version $Revision: 5987 $
330   */
331  protected class TempNode
332    implements RevisionHandler {
333   
334    /** The centre/pivot of the node. */
335    Instance anchor;
336    /** The radius of the node. */
337    double radius;
338    /** Indices of the points in the node. */
339    int [] points;
340    /** The node's left child. */
341    TempNode left = null;
342    /** The node's right child. */
343    TempNode right = null;
344   
345    /**
346     * Prints the node.
347     * @return The node as a string.
348     */
349    public String toString() {
350      StringBuffer bf = new StringBuffer();
351      bf.append("p: ");
352      for(int i=0; i<points.length; i++) 
353        if(i!=0)
354          bf.append(", "+points[i]);
355        else
356          bf.append(""+points[i]);
357      return bf.toString();
358    }
359   
360    /**
361     * Returns the revision string.
362     *
363     * @return          the revision
364     */
365    public String getRevision() {
366      return RevisionUtils.extract("$Revision: 5987 $");
367    }
368  }
369 
370  /**
371   * Returns the revision string.
372   *
373   * @return            the revision
374   */
375  public String getRevision() {
376    return RevisionUtils.extract("$Revision: 5987 $");
377  }
378}
Note: See TracBrowser for help on using the repository browser.