source: branches/MetisMQI/src/main/java/weka/core/neighboursearch/kdtrees/MedianOfWidestDimension.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 8.0 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * MedianOfWidestDimension.java
19 * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
20 */
21
22package weka.core.neighboursearch.kdtrees;
23
24import weka.core.RevisionUtils;
25import weka.core.TechnicalInformation;
26import weka.core.TechnicalInformationHandler;
27import weka.core.TechnicalInformation.Field;
28import weka.core.TechnicalInformation.Type;
29
30/**
31 <!-- globalinfo-start -->
32 * The class that splits a KDTree node based on the median value of a dimension in which the node's points have the widest spread.<br/>
33 * <br/>
34 * For more information see also:<br/>
35 * <br/>
36 * Jerome H. Friedman, Jon Luis Bentley, Raphael Ari Finkel (1977). An Algorithm for Finding Best Matches in Logarithmic Expected Time. ACM Transactions on Mathematics Software. 3(3):209-226.
37 * <p/>
38 <!-- globalinfo-end -->
39 *
40 <!-- technical-bibtex-start -->
41 * BibTeX:
42 * <pre>
43 * &#64;article{Friedman1977,
44 *    author = {Jerome H. Friedman and Jon Luis Bentley and Raphael Ari Finkel},
45 *    journal = {ACM Transactions on Mathematics Software},
46 *    month = {September},
47 *    number = {3},
48 *    pages = {209-226},
49 *    title = {An Algorithm for Finding Best Matches in Logarithmic Expected Time},
50 *    volume = {3},
51 *    year = {1977}
52 * }
53 * </pre>
54 * <p/>
55 <!-- technical-bibtex-end -->
56 *
57 <!-- options-start -->
58 <!-- options-end -->
59 *
60 * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
61 * @version $Revision: 5953 $
62 */
63public class MedianOfWidestDimension
64  extends KDTreeNodeSplitter
65  implements TechnicalInformationHandler {
66
67  /** for serialization. */
68  private static final long serialVersionUID = 1383443320160540663L;
69
70  /**
71   * Returns a string describing this nearest neighbour search algorithm.
72   *
73   * @return            a description of the algorithm for displaying in the
74   *                    explorer/experimenter gui
75   */
76  public String globalInfo() {
77    return 
78        "The class that splits a KDTree node based on the median value of "
79      + "a dimension in which the node's points have the widest spread.\n\n"
80      + "For more information see also:\n\n"
81      + getTechnicalInformation().toString();
82  }
83
84  /**
85   * Returns an instance of a TechnicalInformation object, containing detailed
86   * information about the technical background of this class, e.g., paper
87   * reference or book this class is based on.
88   *
89   * @return            the technical information about this class
90   */
91  public TechnicalInformation getTechnicalInformation() {
92    TechnicalInformation result;
93
94    result = new TechnicalInformation(Type.ARTICLE);
95    result.setValue(Field.AUTHOR, "Jerome H. Friedman and Jon Luis Bentley and Raphael Ari Finkel");
96    result.setValue(Field.YEAR, "1977");
97    result.setValue(Field.TITLE, "An Algorithm for Finding Best Matches in Logarithmic Expected Time");
98    result.setValue(Field.JOURNAL, "ACM Transactions on Mathematics Software");
99    result.setValue(Field.PAGES, "209-226");
100    result.setValue(Field.MONTH, "September");
101    result.setValue(Field.VOLUME, "3");
102    result.setValue(Field.NUMBER, "3");
103
104    return result;
105  }
106 
107  /**
108   * Splits a node into two based on the median value of the dimension
109   * in which the points have the widest spread. After splitting two
110   * new nodes are created and correctly initialised. And, node.left
111   * and node.right are set appropriately.
112   *
113   * @param node The node to split.
114   * @param numNodesCreated The number of nodes that so far have been
115   * created for the tree, so that the newly created nodes are
116   * assigned correct/meaningful node numbers/ids.
117   * @param nodeRanges The attributes' range for the points inside
118   * the node that is to be split.
119   * @param universe The attributes' range for the whole
120   * point-space.
121   * @throws Exception If there is some problem in splitting the
122   * given node.
123   */
124  public void splitNode(KDTreeNode node, int numNodesCreated, 
125                        double[][] nodeRanges, double[][] universe) throws Exception {
126   
127    correctlyInitialized();
128   
129    int splitDim = widestDim(nodeRanges, universe);
130   
131    //In this case median is defined to be either the middle value (in case of
132    //odd number of values) or the left of the two middle values (in case of
133    //even number of values).
134    int medianIdxIdx = node.m_Start + (node.m_End-node.m_Start)/2;
135    //the following finds the median and also re-arranges the array so all
136    //elements to the left are < median and those to the right are > median.
137    int medianIdx = select(splitDim, m_InstList, node.m_Start, node.m_End, (node.m_End-node.m_Start)/2+1);
138   
139   
140    node.m_SplitDim = splitDim;
141    node.m_SplitValue = m_Instances.instance(m_InstList[medianIdx]).value(splitDim);
142   
143    node.m_Left  = new KDTreeNode(numNodesCreated+1, node.m_Start, medianIdxIdx,
144        m_EuclideanDistance.initializeRanges(m_InstList, node.m_Start, medianIdxIdx));
145    node.m_Right = new KDTreeNode(numNodesCreated+2, medianIdxIdx+1, node.m_End,
146        m_EuclideanDistance.initializeRanges(m_InstList, medianIdxIdx+1, node.m_End)); 
147  }
148 
149  /**
150   * Partitions the instances around a pivot. Used by quicksort and
151   * kthSmallestValue.
152   *
153   * @param attIdx The attribution/dimension based on which the
154   * instances should be partitioned.
155   * @param index The master index array containing indices of the
156   * instances.
157   * @param l The begining index of the portion of master index
158   * array that should be partitioned.
159   * @param r The end index of the portion of master index array
160   * that should be partitioned.
161   * @return the index of the middle element
162   */
163  protected int partition(int attIdx, int[] index, int l, int r) {
164   
165    double pivot = m_Instances.instance(index[(l + r) / 2]).value(attIdx);
166    int help;
167
168    while (l < r) {
169      while ((m_Instances.instance(index[l]).value(attIdx) < pivot) && (l < r)) {
170        l++;
171      }
172      while ((m_Instances.instance(index[r]).value(attIdx) > pivot) && (l < r)) {
173        r--;
174      }
175      if (l < r) {
176        help = index[l];
177        index[l] = index[r];
178        index[r] = help;
179        l++;
180        r--;
181      }
182    }
183    if ((l == r) && (m_Instances.instance(index[r]).value(attIdx) > pivot)) {
184      r--;
185    } 
186
187    return r;
188  }
189
190  /**
191   * Implements computation of the kth-smallest element according
192   * to Manber's "Introduction to Algorithms".
193   *
194   * @param attIdx The dimension/attribute of the instances in
195   * which to find the kth-smallest element.
196   * @param indices The master index array containing indices of
197   * the instances.
198   * @param left The begining index of the portion of the master
199   * index array in which to find the kth-smallest element.
200   * @param right The end index of the portion of the master index
201   * array in which to find the kth-smallest element.
202   * @param k The value of k
203   * @return The index of the kth-smallest element
204   */
205  public int select(int attIdx, int[] indices, int left, int right, int k) {
206   
207    if (left == right) {
208      return left;
209    } else {
210      int middle = partition(attIdx, indices, left, right);
211      if ((middle - left + 1) >= k) {
212        return select(attIdx, indices, left, middle, k);
213      } else {
214        return select(attIdx, indices, middle + 1, right, k - (middle - left + 1));
215      }
216    }
217  }
218 
219  /**
220   * Returns the revision string.
221   *
222   * @return            the revision
223   */
224  public String getRevision() {
225    return RevisionUtils.extract("$Revision: 5953 $");
226  }
227}
Note: See TracBrowser for help on using the repository browser.