source: branches/MetisMQI/src/main/java/weka/core/neighboursearch/kdtrees/SlidingMidPointOfWidestSide.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 10.0 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * SlidingMidPointOfWidestSide.java
19 * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
20 */
21
22package weka.core.neighboursearch.kdtrees;
23
24import weka.core.RevisionUtils;
25import weka.core.TechnicalInformation;
26import weka.core.TechnicalInformationHandler;
27import weka.core.TechnicalInformation.Field;
28import weka.core.TechnicalInformation.Type;
29
30/**
31 <!-- globalinfo-start -->
32 * The class that splits a node into two based on the midpoint value of the dimension in which the node's rectangle is widest. If after splitting one side is empty then it is slided towards the non-empty side until there is at least one point on the empty side.<br/>
33 * <br/>
34 * For more information see also:<br/>
35 * <br/>
36 * David M. Mount (2006). ANN Programming Manual. College Park, MD, USA.
37 * <p/>
38 <!-- globalinfo-end -->
39 *
40 <!-- technical-bibtex-start -->
41 * BibTeX:
42 * <pre>
43 * &#64;manual{Mount2006,
44 *    address = {College Park, MD, USA},
45 *    author = {David M. Mount},
46 *    organization = {Department of Computer Science, University of Maryland},
47 *    title = {ANN Programming Manual},
48 *    year = {2006},
49 *    HTTP = {Available from http://www.cs.umd.edu/\~mount/ANN/}
50 * }
51 * </pre>
52 * <p/>
53 <!-- technical-bibtex-end -->
54 *
55 <!-- options-start -->
56 <!-- options-end -->
57 *
58 * @author  Ashraf M. Kibriya (amk14@waikato.ac.nz)
59 * @version $Revision: 5953 $
60 */
61public class SlidingMidPointOfWidestSide
62  extends KDTreeNodeSplitter
63  implements TechnicalInformationHandler {
64
65  /** for serialization. */
66  private static final long serialVersionUID = 852857628205680562L;
67
68  /** The floating point error to tolerate in finding the widest
69   * rectangular side. */
70  protected static double ERR = 0.001;
71
72  /**
73   * Returns a string describing this nearest neighbour search algorithm.
74   *
75   * @return            a description of the algorithm for displaying in the
76   *                    explorer/experimenter gui
77   */
78  public String globalInfo() {
79    return 
80        "The class that splits a node into two based on the midpoint value of "
81      + "the dimension in which the node's rectangle is widest. If after "
82      + "splitting one side is empty then it is slided towards the non-empty "
83      + "side until there is at least one point on the empty side.\n\n"
84      + "For more information see also:\n\n"
85      + getTechnicalInformation().toString();
86  }
87 
88  /**
89   * Returns an instance of a TechnicalInformation object, containing detailed
90   * information about the technical background of this class, e.g., paper
91   * reference or book this class is based on.
92   *
93   * @return            the technical information about this class
94   */
95  public TechnicalInformation getTechnicalInformation() {
96    TechnicalInformation result;
97
98    result = new TechnicalInformation(Type.MANUAL);
99    result.setValue(Field.AUTHOR, "David M. Mount");
100    result.setValue(Field.YEAR, "2006");
101    result.setValue(Field.TITLE, "ANN Programming Manual");
102    result.setValue(Field.ORGANIZATION, "Department of Computer Science, University of Maryland");
103    result.setValue(Field.ADDRESS,
104        "College Park, MD, USA");
105    result.setValue(Field.HTTP,
106        "Available from http://www.cs.umd.edu/~mount/ANN/");
107
108    return result;
109  }
110
111  /**
112   * Splits a node into two based on the midpoint value of the dimension
113   * in which the node's rectangle is widest. If after splitting one side
114   * is empty then it is slided towards the non-empty side until there is
115   * at least one point on the empty side. The two nodes created after the
116   * whole splitting are correctly initialised. And, node.left and
117   * node.right are set appropriately. 
118   * @param node The node to split.
119   * @param numNodesCreated The number of nodes that so far have been
120   * created for the tree, so that the newly created nodes are
121   * assigned correct/meaningful node numbers/ids.
122   * @param nodeRanges The attributes' range for the points inside
123   * the node that is to be split.
124   * @param universe The attributes' range for the whole
125   * point-space.
126   * @throws Exception If there is some problem in splitting the
127   * given node.
128   */
129  public void splitNode(KDTreeNode node, int numNodesCreated,
130      double[][] nodeRanges, double[][] universe) throws Exception {
131
132    correctlyInitialized();
133
134    if (node.m_NodesRectBounds == null) {
135      node.m_NodesRectBounds = new double[2][node.m_NodeRanges.length];
136      for (int i = 0; i < node.m_NodeRanges.length; i++) {
137        node.m_NodesRectBounds[MIN][i] = node.m_NodeRanges[i][MIN];
138        node.m_NodesRectBounds[MAX][i] = node.m_NodeRanges[i][MAX];
139      }
140    }
141
142    // finding widest side of the hyper rectangle
143    double maxRectWidth = Double.NEGATIVE_INFINITY, maxPtWidth = Double.NEGATIVE_INFINITY, tempval;
144    int splitDim = -1, classIdx = m_Instances.classIndex();
145
146    for (int i = 0; i < node.m_NodesRectBounds[0].length; i++) {
147      if (i == classIdx)
148        continue;
149      tempval = node.m_NodesRectBounds[MAX][i] - node.m_NodesRectBounds[MIN][i];
150      if (m_NormalizeNodeWidth) {
151        tempval = tempval / universe[i][WIDTH];
152      }
153      if (tempval > maxRectWidth && node.m_NodeRanges[i][WIDTH] > 0.0)
154        maxRectWidth = tempval;
155    }
156
157    for (int i = 0; i < node.m_NodesRectBounds[0].length; i++) {
158      if (i == classIdx)
159        continue;
160      tempval = node.m_NodesRectBounds[MAX][i] - node.m_NodesRectBounds[MIN][i];
161      if (m_NormalizeNodeWidth) {
162        tempval = tempval / universe[i][WIDTH];
163      }
164      if (tempval >= maxRectWidth * (1 - ERR)
165          && node.m_NodeRanges[i][WIDTH] > 0.0) {
166        if (node.m_NodeRanges[i][WIDTH] > maxPtWidth) {
167          maxPtWidth = node.m_NodeRanges[i][WIDTH];
168          if (m_NormalizeNodeWidth)
169            maxPtWidth = maxPtWidth / universe[i][WIDTH];
170          splitDim = i;
171        }
172      }
173    }
174
175    double splitVal = node.m_NodesRectBounds[MIN][splitDim]
176        + (node.m_NodesRectBounds[MAX][splitDim] - node.m_NodesRectBounds[MIN][splitDim])
177        * 0.5;
178    // might want to try to slide it further to contain more than one point on
179    // the
180    // side that is resulting empty
181    if (splitVal < node.m_NodeRanges[splitDim][MIN])
182      splitVal = node.m_NodeRanges[splitDim][MIN];
183    else if (splitVal >= node.m_NodeRanges[splitDim][MAX])
184      splitVal = node.m_NodeRanges[splitDim][MAX]
185          - node.m_NodeRanges[splitDim][WIDTH] * 0.001;
186
187    int rightStart = rearrangePoints(m_InstList, node.m_Start, node.m_End,
188        splitDim, splitVal);
189
190    if (rightStart == node.m_Start || rightStart > node.m_End) {
191      if (rightStart == node.m_Start)
192        throw new Exception("Left child is empty in node " + node.m_NodeNumber
193            + ". Not possible with "
194            + "SlidingMidPointofWidestSide splitting method. Please "
195            + "check code.");
196      else
197        throw new Exception("Right child is empty in node " + node.m_NodeNumber
198            + ". Not possible with "
199            + "SlidingMidPointofWidestSide splitting method. Please "
200            + "check code.");
201    }
202
203    node.m_SplitDim = splitDim;
204    node.m_SplitValue = splitVal;
205
206    double[][] widths = new double[2][node.m_NodesRectBounds[0].length];
207
208    System.arraycopy(node.m_NodesRectBounds[MIN], 0, widths[MIN], 0,
209        node.m_NodesRectBounds[MIN].length);
210    System.arraycopy(node.m_NodesRectBounds[MAX], 0, widths[MAX], 0,
211        node.m_NodesRectBounds[MAX].length);
212    widths[MAX][splitDim] = splitVal;
213
214    node.m_Left = new KDTreeNode(numNodesCreated + 1, node.m_Start,
215        rightStart - 1, m_EuclideanDistance.initializeRanges(m_InstList,
216            node.m_Start, rightStart - 1), widths);
217
218    widths = new double[2][node.m_NodesRectBounds[0].length];
219    System.arraycopy(node.m_NodesRectBounds[MIN], 0, widths[MIN], 0,
220        node.m_NodesRectBounds[MIN].length);
221    System.arraycopy(node.m_NodesRectBounds[MAX], 0, widths[MAX], 0,
222        node.m_NodesRectBounds[MAX].length);
223    widths[MIN][splitDim] = splitVal;
224
225    node.m_Right = new KDTreeNode(numNodesCreated + 2, rightStart, node.m_End,
226        m_EuclideanDistance.initializeRanges(m_InstList, rightStart, node.m_End), widths);
227  }
228 
229  /**
230   * Re-arranges the indices array such that the points <= to the splitVal
231   * are on the left of the array and those > the splitVal are on the right.
232   *
233   * @param indices The master index array.
234   * @param startidx The begining index of portion of indices that needs
235   * re-arranging.
236   * @param endidx The end index of portion of indices that needs
237   * re-arranging.
238   * @param splitDim The split dimension/attribute.
239   * @param splitVal The split value.
240   * @return The startIdx of the points > the splitVal (the points
241   * belonging to the right child of the node).
242   */
243  protected int rearrangePoints(int[] indices, final int startidx,
244      final int endidx, final int splitDim, final double splitVal) {
245
246    int tmp, left = startidx - 1;
247    for (int i = startidx; i <= endidx; i++) {
248      if (m_EuclideanDistance.valueIsSmallerEqual(m_Instances
249          .instance(indices[i]), splitDim, splitVal)) {
250        left++;
251        tmp = indices[left];
252        indices[left] = indices[i];
253        indices[i] = tmp;
254      }// end valueIsSmallerEqual
255    }// endfor
256    return left + 1;
257  }
258 
259  /**
260   * Returns the revision string.
261   *
262   * @return            the revision
263   */
264  public String getRevision() {
265    return RevisionUtils.extract("$Revision: 5953 $");
266  }
267}
Note: See TracBrowser for help on using the repository browser.