source: tags/MetisMQIDemo/src/main/java/weka/core/neighboursearch/balltrees/MedianOfWidestDimension.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 12.7 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * MedianOfWidestDimension.java
19 * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
20 */
21
22package weka.core.neighboursearch.balltrees;
23
24import weka.core.EuclideanDistance;
25import weka.core.Instance;
26import weka.core.Instances;
27import weka.core.Option;
28import weka.core.OptionHandler;
29import weka.core.RevisionUtils;
30import weka.core.TechnicalInformation;
31import weka.core.TechnicalInformationHandler;
32import weka.core.Utils;
33import weka.core.TechnicalInformation.Field;
34import weka.core.TechnicalInformation.Type;
35
36import java.util.Enumeration;
37import java.util.Vector;
38
39/**
40 <!-- globalinfo-start -->
41 * Class that splits a BallNode of a ball tree based on the median value of the widest dimension of the points in the ball. It essentially implements Omohundro's  KD construction algorithm.
42 * <p/>
43 <!-- globalinfo-end -->
44 *
45 <!-- technical-bibtex-start -->
46 * BibTeX:
47 * <pre>
48 * &#64;techreport{Omohundro1989,
49 *    author = {Stephen M. Omohundro},
50 *    institution = {International Computer Science Institute},
51 *    month = {December},
52 *    number = {TR-89-063},
53 *    title = {Five Balltree Construction Algorithms},
54 *    year = {1989}
55 * }
56 * </pre>
57 * <p/>
58 <!-- technical-bibtex-end -->
59 *
60 <!-- options-start -->
61 * Valid options are: <p/>
62 *
63 * <pre> -N
64 *  Normalize dimensions' widths.</pre>
65 *
66 <!-- options-end -->
67 *
68 * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
69 * @version $Revision: 5953 $
70 */
71public class MedianOfWidestDimension
72  extends BallSplitter
73  implements OptionHandler, TechnicalInformationHandler {
74 
75  /** for serialization. */
76  private static final long serialVersionUID = 3054842574468790421L;
77 
78  /**
79   * Should we normalize the widths(ranges) of the dimensions (attributes)
80   * before selecting the widest one.
81   */
82  protected boolean m_NormalizeDimWidths = true;
83 
84  /**
85   * Constructor.
86   */
87  public MedianOfWidestDimension() {
88  }
89 
90  /**
91   * Constructor.
92   * @param instList The master index array.
93   * @param insts The instances on which the tree
94   * is (or is to be) built.
95   * @param e The Euclidean distance function to
96   * use for splitting.
97   */
98  public MedianOfWidestDimension(int[] instList, Instances insts, 
99                                 EuclideanDistance e) {
100    super(instList, insts, e);
101  }
102  /**
103   * Returns a string describing this nearest neighbour search algorithm.
104   *
105   * @return            a description of the algorithm for displaying in the
106   *                    explorer/experimenter gui
107   */
108  public String globalInfo() {
109    return 
110        "Class that splits a BallNode of a ball tree based on the "
111      + "median value of the widest dimension of the points in the ball. "
112      + "It essentially implements Omohundro's  KD construction algorithm.";
113  }
114
115 
116  /**
117   * Returns an instance of a TechnicalInformation object, containing detailed
118   * information about the technical background of this class, e.g., paper
119   * reference or book this class is based on.
120   *
121   * @return            the technical information about this class
122   */
123  public TechnicalInformation getTechnicalInformation() {
124    TechnicalInformation result;
125   
126    result = new TechnicalInformation(Type.TECHREPORT);
127    result.setValue(Field.AUTHOR, "Stephen M. Omohundro");
128    result.setValue(Field.YEAR, "1989");
129    result.setValue(Field.TITLE, "Five Balltree Construction Algorithms");
130    result.setValue(Field.MONTH, "December");
131    result.setValue(Field.NUMBER, "TR-89-063");
132    result.setValue(Field.INSTITUTION, "International Computer Science Institute");
133
134    return result;
135  }
136 
137  /**
138   * Splits a ball into two.
139   * @param node The node to split.
140   * @param numNodesCreated The number of nodes that so far have been
141   * created for the tree, so that the newly created nodes are
142   * assigned correct/meaningful node numbers/ids.
143   * @throws Exception If there is some problem in splitting the
144   * given node.
145   */
146  public void splitNode(BallNode node, int numNodesCreated) throws Exception {
147    correctlyInitialized();
148    //int[] instList = getNodesInstsList(node);
149    double[][] ranges = m_DistanceFunction.initializeRanges(m_Instlist, 
150                                                            node.m_Start, 
151                                                            node.m_End);
152   
153    int splitAttrib = widestDim(ranges, m_DistanceFunction.getRanges());
154   
155    //In this case median is defined to be either the middle value (in case of
156    //odd number of values) or the left of the two middle values (in case of
157    //even number of values).
158    int medianIdxIdx = node.m_Start + (node.m_End-node.m_Start)/2;
159    //the following finds the median and also re-arranges the array so all
160    //elements to the left are < median and those to the right are > median.
161    int medianIdx = select(splitAttrib, m_Instlist, node.m_Start, node.m_End, (node.m_End-node.m_Start)/2+1); //Utils.select(array, indices, node.m_Start, node.m_End, (node.m_End-node.m_Start)/2+1); //(int) (node.m_NumInstances/2D+0.5D);
162
163    Instance pivot;
164   
165    node.m_SplitAttrib = splitAttrib;
166    node.m_SplitVal = m_Instances.instance(m_Instlist[medianIdx])
167                                            .value(splitAttrib);
168
169    node.m_Left = new BallNode(node.m_Start, medianIdxIdx, numNodesCreated+1,
170                              (pivot=BallNode.calcCentroidPivot(node.m_Start,
171                                                       medianIdxIdx, m_Instlist, 
172                                                       m_Instances)), 
173                              BallNode.calcRadius(node.m_Start, medianIdxIdx, 
174                                                  m_Instlist, m_Instances, 
175                                                  pivot, m_DistanceFunction)
176                              );
177    node.m_Right = new BallNode(medianIdxIdx+1, node.m_End, numNodesCreated+2,
178                              (pivot=BallNode.calcCentroidPivot(medianIdxIdx+1,
179                                                       node.m_End, m_Instlist, 
180                                                       m_Instances)), 
181                              BallNode.calcRadius(medianIdxIdx+1, node.m_End, 
182                                                  m_Instlist, m_Instances, 
183                                                  pivot, m_DistanceFunction)
184                              );
185  }
186
187  /**
188   * Partitions the instances around a pivot. Used by quicksort and
189   * kthSmallestValue.
190   *
191   * @param attIdx The attribution/dimension based on which the
192   * instances should be partitioned.
193   * @param index The master index array containing indices of the
194   * instances.
195   * @param l The begining index of the portion of master index
196   * array that should be partitioned.
197   * @param r The end index of the portion of master index array
198   * that should be partitioned.
199   * @return the index of the middle element (in the master
200   * index array, i.e. index of the index of middle element).
201   */
202  protected int partition(int attIdx, int[] index, int l, int r) {
203   
204    double pivot = m_Instances.instance(index[(l + r) / 2]).value(attIdx);
205    int help;
206
207    while (l < r) {
208      while ((m_Instances.instance(index[l]).value(attIdx) < pivot) && (l < r)) {
209        l++;
210      }
211      while ((m_Instances.instance(index[r]).value(attIdx) > pivot) && (l < r)) {
212        r--;
213      }
214      if (l < r) {
215        help = index[l];
216        index[l] = index[r];
217        index[r] = help;
218        l++;
219        r--;
220      }
221    }
222    if ((l == r) && (m_Instances.instance(index[r]).value(attIdx) > pivot)) {
223      r--;
224    } 
225
226    return r;
227  }
228
229  /**
230   * Implements computation of the kth-smallest element according
231   * to Manber's "Introduction to Algorithms".
232   *
233   * @param attIdx The dimension/attribute of the instances in
234   * which to find the kth-smallest element.
235   * @param indices The master index array containing indices of
236   * the instances.
237   * @param left The begining index of the portion of the master
238   * index array in which to find the kth-smallest element.
239   * @param right The end index of the portion of the master index
240   * array in which to find the kth-smallest element.
241   * @param k The value of k
242   * @return The index of the kth-smallest element
243   */
244  public int select(int attIdx, int[] indices, 
245                            int left, int right, int k) {
246   
247    if (left == right) {
248      return left;
249    } else {
250      int middle = partition(attIdx, indices, left, right);
251      if ((middle - left + 1) >= k) {
252        return select(attIdx, indices, left, middle, k);
253      } else {
254        return select(attIdx, indices, middle + 1, right, k - (middle - left + 1));
255      }
256    }
257  }
258 
259  /**
260   * Returns the widest dimension. The width of each
261   * dimension (for the points inside the node) is
262   * normalized, if m_NormalizeNodeWidth is set to
263   * true.
264   * @param nodeRanges The attributes' range of the
265   * points inside the node that is to be split.
266   * @param universe The attributes' range for the
267   * whole point-space.
268   * @return The index of the attribute/dimension
269   * in which the points of the node have widest
270   * spread.
271   */
272  protected int widestDim(double[][] nodeRanges, double[][] universe) {
273    final int classIdx = m_Instances.classIndex();
274    double widest = 0.0;
275    int w = -1;
276    if (m_NormalizeDimWidths) {
277        for (int i = 0; i < nodeRanges.length; i++) {
278          double newWidest = nodeRanges[i][m_DistanceFunction.R_WIDTH] / 
279                             universe[i][m_DistanceFunction.R_WIDTH];
280          if (newWidest > widest) {
281            if(i == classIdx) continue;
282            widest = newWidest;
283            w = i;
284          }
285        }
286    }
287    else {
288        for (int i = 0; i < nodeRanges.length; i++) {
289          if (nodeRanges[i][m_DistanceFunction.R_WIDTH] > widest) {
290            if(i == classIdx) continue;
291            widest = nodeRanges[i][m_DistanceFunction.R_WIDTH];
292            w = i;
293          }
294        }
295    }
296    return w;
297  } 
298 
299  /**
300   * Returns the tip text for this property.
301   *
302   * @return            tip text for this property suitable for
303   *                    displaying in the explorer/experimenter gui
304   */
305  public String normalizeDimWidthsTipText() {
306    return 
307        "Whether to normalize the widths(ranges) of the dimensions "
308      + "(attributes) before selecting the widest one.";
309  }
310 
311  /**
312   * Should we normalize the widths(ranges) of the dimensions (attributes)
313   * before selecting the widest one.
314   * @param normalize Should be true if the widths are to be
315   * normalized.
316   */
317  public void setNormalizeDimWidths(boolean normalize) {
318    m_NormalizeDimWidths = normalize;
319  }
320 
321  /**
322   * Whether we are normalizing the widths(ranges) of the dimensions (attributes)
323   * or not.
324   * @return true if widths are being normalized.
325   */
326  public boolean getNormalizeDimWidths() {
327    return m_NormalizeDimWidths;
328  }
329 
330  /**
331   * Returns an enumeration describing the available options.
332   *
333   * @return            an enumeration of all the available options.
334   */
335  public Enumeration listOptions() {
336    Vector<Option> newVector = new Vector<Option>();
337   
338    newVector.addElement(new Option(
339        "\tNormalize dimensions' widths.",
340        "N", 0, "-N"));
341   
342    return newVector.elements();
343  }
344
345  /**
346   * Parses a given list of options.
347   *
348   <!-- options-start -->
349   * Valid options are: <p/>
350   *
351   * <pre> -N
352   *  Normalize dimensions' widths.</pre>
353   *
354   <!-- options-end -->
355   *
356   * @param options the list of options as an array of strings
357   * @throws Exception if an option is not supported
358   */
359  public void setOptions(String[] options)
360    throws Exception {
361   
362    setNormalizeDimWidths(Utils.getFlag('N', options));
363  }
364
365  /**
366   * Gets the current settings.
367   * @return An array of strings suitable for passing to
368   * setOptions or to be displayed by a
369   * GenericObjectEditor.
370   */
371  public String[] getOptions() {
372    Vector<String>      result;
373   
374    result = new Vector<String>();
375
376    if (getNormalizeDimWidths())
377      result.add("-N");
378   
379    return result.toArray(new String[result.size()]);
380  }
381 
382  /**
383   * Returns the revision string.
384   *
385   * @return            the revision
386   */
387  public String getRevision() {
388    return RevisionUtils.extract("$Revision: 5953 $");
389  }
390}
Note: See TracBrowser for help on using the repository browser.