source: tags/MetisMQIDemo/src/main/java/weka/core/neighboursearch/balltrees/MedianDistanceFromArbitraryPoint.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 12.7 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * MedianDistanceFromArbitraryPoint.java
19 * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
20 */
21
22package weka.core.neighboursearch.balltrees;
23
24import weka.core.EuclideanDistance;
25import weka.core.Instance;
26import weka.core.Instances;
27import weka.core.Option;
28import weka.core.RevisionUtils;
29import weka.core.TechnicalInformation;
30import weka.core.TechnicalInformationHandler;
31import weka.core.Utils;
32import weka.core.TechnicalInformation.Field;
33import weka.core.TechnicalInformation.Type;
34
35import java.util.Enumeration;
36import java.util.Random;
37import java.util.Vector;
38
39/**
40 <!-- globalinfo-start -->
41 * Class that splits a BallNode of a ball tree using Uhlmann's described method.<br/>
42 * <br/>
43 * For information see:<br/>
44 * <br/>
45 * Jeffrey K. Uhlmann (1991). Satisfying general proximity/similarity queries with metric trees. Information Processing Letters. 40(4):175-179.<br/>
46 * <br/>
47 * Ashraf Masood Kibriya (2007). Fast Algorithms for Nearest Neighbour Search. Hamilton, New Zealand.
48 * <p/>
49 <!-- globalinfo-end -->
50 *
51 <!-- technical-bibtex-start -->
52 * BibTeX:
53 * <pre>
54 * &#64;article{Uhlmann1991,
55 *    author = {Jeffrey K. Uhlmann},
56 *    journal = {Information Processing Letters},
57 *    month = {November},
58 *    number = {4},
59 *    pages = {175-179},
60 *    title = {Satisfying general proximity/similarity queries with metric trees},
61 *    volume = {40},
62 *    year = {1991}
63 * }
64 *
65 * &#64;mastersthesis{Kibriya2007,
66 *    address = {Hamilton, New Zealand},
67 *    author = {Ashraf Masood Kibriya},
68 *    school = {Department of Computer Science, School of Computing and Mathematical Sciences, University of Waikato},
69 *    title = {Fast Algorithms for Nearest Neighbour Search},
70 *    year = {2007}
71 * }
72 * </pre>
73 * <p/>
74 <!-- technical-bibtex-end -->
75 *
76 <!-- options-start -->
77 * Valid options are: <p/>
78 *
79 * <pre> -S &lt;num&gt;
80 *  The seed value for the random number generator.
81 *  (default: 17)</pre>
82 *
83 <!-- options-end -->
84 *
85 * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
86 * @version $Revision: 5953 $
87 */
88public class MedianDistanceFromArbitraryPoint
89  extends BallSplitter
90  implements TechnicalInformationHandler {
91 
92  /** for serialization. */
93  private static final long serialVersionUID = 5617378551363700558L;
94 
95  /** Seed for random number generator. */
96  protected int m_RandSeed = 17;
97
98  /**
99   * Random number generator for selecting
100   * an abitrary (random) point.
101   */
102  protected Random m_Rand;
103 
104  /** Constructor. */
105  public MedianDistanceFromArbitraryPoint() {
106  }
107 
108  /**
109   * Constructor.
110   * @param instList The master index array.
111   * @param insts The instances on which the tree
112   * is (or is to be) built.
113   * @param e The Euclidean distance function to
114   * use for splitting.
115   */
116  public MedianDistanceFromArbitraryPoint(int[] instList, Instances insts, EuclideanDistance e) {
117    super(instList, insts, e);
118  }
119 
120  /**
121   * Returns a string describing this nearest neighbour search algorithm.
122   *
123   * @return            a description of the algorithm for displaying in the
124   *                    explorer/experimenter gui
125   */
126  public String globalInfo() {
127    return 
128        "Class that splits a BallNode of a ball tree using Uhlmann's "
129      + "described method.\n\n"
130      + "For information see:\n\n"
131      + getTechnicalInformation().toString();
132  }
133
134  /**
135   * Returns an instance of a TechnicalInformation object, containing detailed
136   * information about the technical background of this class, e.g., paper
137   * reference or book this class is based on.
138   *
139   * @return            the technical information about this class
140   */
141  public TechnicalInformation getTechnicalInformation() {
142    TechnicalInformation result;
143    TechnicalInformation additional;
144
145    result = new TechnicalInformation(Type.ARTICLE);
146    result.setValue(Field.AUTHOR, "Jeffrey K. Uhlmann");
147    result.setValue(Field.TITLE, "Satisfying general proximity/similarity queries with metric trees");
148    result.setValue(Field.JOURNAL, "Information Processing Letters");
149    result.setValue(Field.MONTH, "November");
150    result.setValue(Field.YEAR, "1991");
151    result.setValue(Field.NUMBER, "4");
152    result.setValue(Field.VOLUME, "40");
153    result.setValue(Field.PAGES, "175-179");
154
155    additional = result.add(Type.MASTERSTHESIS);
156    additional.setValue(Field.AUTHOR, "Ashraf Masood Kibriya");
157    additional.setValue(Field.TITLE, "Fast Algorithms for Nearest Neighbour Search");
158    additional.setValue(Field.YEAR, "2007");
159    additional.setValue(Field.SCHOOL, "Department of Computer Science, School of Computing and Mathematical Sciences, University of Waikato");
160    additional.setValue(Field.ADDRESS, "Hamilton, New Zealand");
161   
162    return result;
163  }
164
165  /**
166   * Returns an enumeration describing the available options.
167   *
168   * @return            an enumeration of all the available options.
169   */
170  public Enumeration listOptions() {
171    Vector<Option> result = new Vector<Option>();
172   
173    Enumeration enm = super.listOptions();
174    while (enm.hasMoreElements())
175      result.addElement((Option)enm.nextElement());
176     
177    result.addElement(new Option(
178        "\tThe seed value for the random number generator.\n"
179        + "\t(default: 17)",
180        "S", 1, "-S <num>"));
181   
182    return result.elements();
183  }
184
185  /**
186   * Parses a given list of options.
187   *
188   <!-- options-start -->
189   * Valid options are: <p/>
190   *
191   * <pre> -S &lt;num&gt;
192   *  The seed value for the random number generator.
193   *  (default: 17)</pre>
194   *
195   <!-- options-end -->
196   *
197   * @param options     the list of options as an array of strings
198   * @throws Exception  if an option is not supported
199   */
200  public void setOptions(String[] options) throws Exception {
201    String      tmpStr;
202   
203    super.setOptions(options);
204
205    tmpStr = Utils.getOption('S', options);
206    if (tmpStr.length() > 0)
207      setRandomSeed(Integer.parseInt(tmpStr));
208    else
209      setRandomSeed(17);
210  }
211
212  /**
213   * Gets the current settings of the object.
214   *
215   * @return            an array of strings suitable for passing to setOptions
216   */
217  public String[] getOptions() {
218    Vector<String>      result;
219    String[]            options;
220    int                 i;
221   
222    result  = new Vector<String>();
223
224    options = super.getOptions();
225    for (i = 0; i < options.length; i++)
226      result.add(options[i]);
227   
228    result.add("-S");
229    result.add("" + getRandomSeed());
230   
231    return result.toArray(new String[result.size()]);
232  }
233 
234  /**
235   * Sets the seed for random number generator.
236   * @param seed The seed value to set.
237   */
238  public void setRandomSeed(int seed) {
239    m_RandSeed = seed;
240  }
241 
242  /**
243   * Returns the seed value of random
244   * number generator.
245   * @return The random seed currently in use.
246   */
247  public int getRandomSeed() {
248    return m_RandSeed;
249  }
250 
251  /**
252   * Returns the tip text for this property.
253   *
254   * @return tip text for this property suitable for
255   * displaying in the explorer/experimenter gui.
256   */
257  public String randomSeedTipText() {
258    return "The seed value for the random number generator.";
259  }
260 
261  /**
262   * Splits a ball into two.
263   * @param node The node to split.
264   * @param numNodesCreated The number of nodes that so far have been
265   * created for the tree, so that the newly created nodes are
266   * assigned correct/meaningful node numbers/ids.
267   * @throws Exception If there is some problem in splitting the
268   * given node.
269   */
270  public void splitNode(BallNode node, int numNodesCreated) throws Exception {
271    correctlyInitialized();
272
273    m_Rand = new Random(m_RandSeed);
274   
275    int ridx = node.m_Start+m_Rand.nextInt(node.m_NumInstances);
276    Instance randomInst = (Instance)
277                            m_Instances.instance( m_Instlist[ridx] ).copy();
278    double [] distList = new double[node.m_NumInstances-1];
279    Instance temp;
280    for(int i=node.m_Start, j=0; i<node.m_End; i++, j++) {
281      temp = m_Instances.instance( m_Instlist[i] );
282      distList[j] = m_DistanceFunction.distance(randomInst, temp, 
283          Double.POSITIVE_INFINITY);
284    }
285   
286    int medianIdx = select(distList, m_Instlist, 0, distList.length-1, 
287                           node.m_Start, (node.m_End-node.m_Start)/2+1) + 
288                    node.m_Start;
289   
290    Instance pivot;
291    node.m_Left = new BallNode(node.m_Start, medianIdx, numNodesCreated+1,
292                              (pivot=BallNode.calcCentroidPivot(node.m_Start,
293                                                       medianIdx, m_Instlist, 
294                                                       m_Instances)), 
295                              BallNode.calcRadius(node.m_Start, medianIdx, 
296                                                  m_Instlist, m_Instances, 
297                                                  pivot, m_DistanceFunction)
298                              );
299   
300    node.m_Right = new BallNode(medianIdx+1, node.m_End, numNodesCreated+2,
301                              (pivot=BallNode.calcCentroidPivot(medianIdx+1,
302                                                       node.m_End, m_Instlist, 
303                                                       m_Instances)), 
304                              BallNode.calcRadius(medianIdx+1, node.m_End, 
305                                                  m_Instlist, m_Instances, 
306                                                  pivot, m_DistanceFunction)
307                              );
308  }
309 
310  /**
311   * Partitions the instances around a pivot. Used by quicksort and
312   * kthSmallestValue.
313   *
314   * @param array The array of distances of the points to the
315   * arbitrarily selected point.
316   * @param index The master index array containing indices of the
317   * instances.
318   * @param l The relative begining index of the portion of master
319   * index array that should be partitioned.
320   * @param r The relative end index of the portion of master index
321   * array that should be partitioned.
322   * @param indexStart The absolute begining index of the portion
323   * of master index array that should be partitioned.
324   * @return the index of the middle element (in the master
325   * index array, i.e. index of the index of middle element).
326   */
327  protected int partition(double[] array, int[] index, int l, int r, 
328                        final int indexStart) {
329   
330    double pivot = array[(l + r) / 2];
331    int help;
332
333    while (l < r) {
334      while ((array[l] < pivot) && (l < r)) {
335        l++;
336      }
337      while ((array[r] > pivot) && (l < r)) {
338        r--;
339      }
340      if (l < r) {
341        help = index[indexStart+l];
342        index[indexStart+l] = index[indexStart+r];
343        index[indexStart+r] = help;
344        l++;
345        r--;
346      }
347    }
348    if ((l == r) && (array[r] > pivot)) {
349      r--;
350    } 
351
352    return r;
353  }
354 
355  /**
356   * Implements computation of the kth-smallest element according
357   * to Manber's "Introduction to Algorithms".
358   *
359   * @param array Array containing the distances of points from
360   * the arbitrarily selected.
361   * @param indices The master index array containing indices of
362   * the instances.
363   * @param left The relative begining index of the portion of the
364   * master index array in which to find the kth-smallest element.
365   * @param right The relative end index of the portion of the
366   * master index array in which to find the kth-smallest element.
367   * @param indexStart The absolute begining index of the portion
368   * of the master index array in which to find the kth-smallest
369   * element.
370   * @param k The value of k
371   * @return The index of the kth-smallest element
372   */
373  protected int select(double[] array, int[] indices, 
374                            int left, int right, final int indexStart, int k) {
375   
376    if (left == right) {
377      return left;
378    } else {
379      int middle = partition(array, indices, left, right, indexStart);
380      if ((middle - left + 1) >= k) {
381        return select(array, indices, left, middle, indexStart, k);
382      } else {
383        return select(array, indices, middle + 1, right, 
384                      indexStart, k - (middle - left + 1));
385      }
386    }
387  }
388 
389  /**
390   * Returns the revision string.
391   *
392   * @return            the revision
393   */
394  public String getRevision() {
395    return RevisionUtils.extract("$Revision: 5953 $");
396  }
397}
Note: See TracBrowser for help on using the repository browser.