| 1 | /* |
|---|
| 2 | * This program is free software; you can redistribute it and/or modify |
|---|
| 3 | * it under the terms of the GNU General Public License as published by |
|---|
| 4 | * the Free Software Foundation; either version 2 of the License, or |
|---|
| 5 | * (at your option) any later version. |
|---|
| 6 | * |
|---|
| 7 | * This program is distributed in the hope that it will be useful, |
|---|
| 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 10 | * GNU General Public License for more details. |
|---|
| 11 | * |
|---|
| 12 | * You should have received a copy of the GNU General Public License |
|---|
| 13 | * along with this program; if not, write to the Free Software |
|---|
| 14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
|---|
| 15 | */ |
|---|
| 16 | |
|---|
| 17 | /* |
|---|
| 18 | * MedianOfWidestDimension.java |
|---|
| 19 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand |
|---|
| 20 | */ |
|---|
| 21 | |
|---|
| 22 | package weka.core.neighboursearch.kdtrees; |
|---|
| 23 | |
|---|
| 24 | import weka.core.RevisionUtils; |
|---|
| 25 | import weka.core.TechnicalInformation; |
|---|
| 26 | import weka.core.TechnicalInformationHandler; |
|---|
| 27 | import weka.core.TechnicalInformation.Field; |
|---|
| 28 | import weka.core.TechnicalInformation.Type; |
|---|
| 29 | |
|---|
| 30 | /** |
|---|
| 31 | <!-- globalinfo-start --> |
|---|
| 32 | * The class that splits a KDTree node based on the median value of a dimension in which the node's points have the widest spread.<br/> |
|---|
| 33 | * <br/> |
|---|
| 34 | * For more information see also:<br/> |
|---|
| 35 | * <br/> |
|---|
| 36 | * Jerome H. Friedman, Jon Luis Bentley, Raphael Ari Finkel (1977). An Algorithm for Finding Best Matches in Logarithmic Expected Time. ACM Transactions on Mathematics Software. 3(3):209-226. |
|---|
| 37 | * <p/> |
|---|
| 38 | <!-- globalinfo-end --> |
|---|
| 39 | * |
|---|
| 40 | <!-- technical-bibtex-start --> |
|---|
| 41 | * BibTeX: |
|---|
| 42 | * <pre> |
|---|
| 43 | * @article{Friedman1977, |
|---|
| 44 | * author = {Jerome H. Friedman and Jon Luis Bentley and Raphael Ari Finkel}, |
|---|
| 45 | * journal = {ACM Transactions on Mathematics Software}, |
|---|
| 46 | * month = {September}, |
|---|
| 47 | * number = {3}, |
|---|
| 48 | * pages = {209-226}, |
|---|
| 49 | * title = {An Algorithm for Finding Best Matches in Logarithmic Expected Time}, |
|---|
| 50 | * volume = {3}, |
|---|
| 51 | * year = {1977} |
|---|
| 52 | * } |
|---|
| 53 | * </pre> |
|---|
| 54 | * <p/> |
|---|
| 55 | <!-- technical-bibtex-end --> |
|---|
| 56 | * |
|---|
| 57 | <!-- options-start --> |
|---|
| 58 | <!-- options-end --> |
|---|
| 59 | * |
|---|
| 60 | * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) |
|---|
| 61 | * @version $Revision: 5953 $ |
|---|
| 62 | */ |
|---|
| 63 | public class MedianOfWidestDimension |
|---|
| 64 | extends KDTreeNodeSplitter |
|---|
| 65 | implements TechnicalInformationHandler { |
|---|
| 66 | |
|---|
| 67 | /** for serialization. */ |
|---|
| 68 | private static final long serialVersionUID = 1383443320160540663L; |
|---|
| 69 | |
|---|
| 70 | /** |
|---|
| 71 | * Returns a string describing this nearest neighbour search algorithm. |
|---|
| 72 | * |
|---|
| 73 | * @return a description of the algorithm for displaying in the |
|---|
| 74 | * explorer/experimenter gui |
|---|
| 75 | */ |
|---|
| 76 | public String globalInfo() { |
|---|
| 77 | return |
|---|
| 78 | "The class that splits a KDTree node based on the median value of " |
|---|
| 79 | + "a dimension in which the node's points have the widest spread.\n\n" |
|---|
| 80 | + "For more information see also:\n\n" |
|---|
| 81 | + getTechnicalInformation().toString(); |
|---|
| 82 | } |
|---|
| 83 | |
|---|
| 84 | /** |
|---|
| 85 | * Returns an instance of a TechnicalInformation object, containing detailed |
|---|
| 86 | * information about the technical background of this class, e.g., paper |
|---|
| 87 | * reference or book this class is based on. |
|---|
| 88 | * |
|---|
| 89 | * @return the technical information about this class |
|---|
| 90 | */ |
|---|
| 91 | public TechnicalInformation getTechnicalInformation() { |
|---|
| 92 | TechnicalInformation result; |
|---|
| 93 | |
|---|
| 94 | result = new TechnicalInformation(Type.ARTICLE); |
|---|
| 95 | result.setValue(Field.AUTHOR, "Jerome H. Friedman and Jon Luis Bentley and Raphael Ari Finkel"); |
|---|
| 96 | result.setValue(Field.YEAR, "1977"); |
|---|
| 97 | result.setValue(Field.TITLE, "An Algorithm for Finding Best Matches in Logarithmic Expected Time"); |
|---|
| 98 | result.setValue(Field.JOURNAL, "ACM Transactions on Mathematics Software"); |
|---|
| 99 | result.setValue(Field.PAGES, "209-226"); |
|---|
| 100 | result.setValue(Field.MONTH, "September"); |
|---|
| 101 | result.setValue(Field.VOLUME, "3"); |
|---|
| 102 | result.setValue(Field.NUMBER, "3"); |
|---|
| 103 | |
|---|
| 104 | return result; |
|---|
| 105 | } |
|---|
| 106 | |
|---|
| 107 | /** |
|---|
| 108 | * Splits a node into two based on the median value of the dimension |
|---|
| 109 | * in which the points have the widest spread. After splitting two |
|---|
| 110 | * new nodes are created and correctly initialised. And, node.left |
|---|
| 111 | * and node.right are set appropriately. |
|---|
| 112 | * |
|---|
| 113 | * @param node The node to split. |
|---|
| 114 | * @param numNodesCreated The number of nodes that so far have been |
|---|
| 115 | * created for the tree, so that the newly created nodes are |
|---|
| 116 | * assigned correct/meaningful node numbers/ids. |
|---|
| 117 | * @param nodeRanges The attributes' range for the points inside |
|---|
| 118 | * the node that is to be split. |
|---|
| 119 | * @param universe The attributes' range for the whole |
|---|
| 120 | * point-space. |
|---|
| 121 | * @throws Exception If there is some problem in splitting the |
|---|
| 122 | * given node. |
|---|
| 123 | */ |
|---|
| 124 | public void splitNode(KDTreeNode node, int numNodesCreated, |
|---|
| 125 | double[][] nodeRanges, double[][] universe) throws Exception { |
|---|
| 126 | |
|---|
| 127 | correctlyInitialized(); |
|---|
| 128 | |
|---|
| 129 | int splitDim = widestDim(nodeRanges, universe); |
|---|
| 130 | |
|---|
| 131 | //In this case median is defined to be either the middle value (in case of |
|---|
| 132 | //odd number of values) or the left of the two middle values (in case of |
|---|
| 133 | //even number of values). |
|---|
| 134 | int medianIdxIdx = node.m_Start + (node.m_End-node.m_Start)/2; |
|---|
| 135 | //the following finds the median and also re-arranges the array so all |
|---|
| 136 | //elements to the left are < median and those to the right are > median. |
|---|
| 137 | int medianIdx = select(splitDim, m_InstList, node.m_Start, node.m_End, (node.m_End-node.m_Start)/2+1); |
|---|
| 138 | |
|---|
| 139 | |
|---|
| 140 | node.m_SplitDim = splitDim; |
|---|
| 141 | node.m_SplitValue = m_Instances.instance(m_InstList[medianIdx]).value(splitDim); |
|---|
| 142 | |
|---|
| 143 | node.m_Left = new KDTreeNode(numNodesCreated+1, node.m_Start, medianIdxIdx, |
|---|
| 144 | m_EuclideanDistance.initializeRanges(m_InstList, node.m_Start, medianIdxIdx)); |
|---|
| 145 | node.m_Right = new KDTreeNode(numNodesCreated+2, medianIdxIdx+1, node.m_End, |
|---|
| 146 | m_EuclideanDistance.initializeRanges(m_InstList, medianIdxIdx+1, node.m_End)); |
|---|
| 147 | } |
|---|
| 148 | |
|---|
| 149 | /** |
|---|
| 150 | * Partitions the instances around a pivot. Used by quicksort and |
|---|
| 151 | * kthSmallestValue. |
|---|
| 152 | * |
|---|
| 153 | * @param attIdx The attribution/dimension based on which the |
|---|
| 154 | * instances should be partitioned. |
|---|
| 155 | * @param index The master index array containing indices of the |
|---|
| 156 | * instances. |
|---|
| 157 | * @param l The begining index of the portion of master index |
|---|
| 158 | * array that should be partitioned. |
|---|
| 159 | * @param r The end index of the portion of master index array |
|---|
| 160 | * that should be partitioned. |
|---|
| 161 | * @return the index of the middle element |
|---|
| 162 | */ |
|---|
| 163 | protected int partition(int attIdx, int[] index, int l, int r) { |
|---|
| 164 | |
|---|
| 165 | double pivot = m_Instances.instance(index[(l + r) / 2]).value(attIdx); |
|---|
| 166 | int help; |
|---|
| 167 | |
|---|
| 168 | while (l < r) { |
|---|
| 169 | while ((m_Instances.instance(index[l]).value(attIdx) < pivot) && (l < r)) { |
|---|
| 170 | l++; |
|---|
| 171 | } |
|---|
| 172 | while ((m_Instances.instance(index[r]).value(attIdx) > pivot) && (l < r)) { |
|---|
| 173 | r--; |
|---|
| 174 | } |
|---|
| 175 | if (l < r) { |
|---|
| 176 | help = index[l]; |
|---|
| 177 | index[l] = index[r]; |
|---|
| 178 | index[r] = help; |
|---|
| 179 | l++; |
|---|
| 180 | r--; |
|---|
| 181 | } |
|---|
| 182 | } |
|---|
| 183 | if ((l == r) && (m_Instances.instance(index[r]).value(attIdx) > pivot)) { |
|---|
| 184 | r--; |
|---|
| 185 | } |
|---|
| 186 | |
|---|
| 187 | return r; |
|---|
| 188 | } |
|---|
| 189 | |
|---|
| 190 | /** |
|---|
| 191 | * Implements computation of the kth-smallest element according |
|---|
| 192 | * to Manber's "Introduction to Algorithms". |
|---|
| 193 | * |
|---|
| 194 | * @param attIdx The dimension/attribute of the instances in |
|---|
| 195 | * which to find the kth-smallest element. |
|---|
| 196 | * @param indices The master index array containing indices of |
|---|
| 197 | * the instances. |
|---|
| 198 | * @param left The begining index of the portion of the master |
|---|
| 199 | * index array in which to find the kth-smallest element. |
|---|
| 200 | * @param right The end index of the portion of the master index |
|---|
| 201 | * array in which to find the kth-smallest element. |
|---|
| 202 | * @param k The value of k |
|---|
| 203 | * @return The index of the kth-smallest element |
|---|
| 204 | */ |
|---|
| 205 | public int select(int attIdx, int[] indices, int left, int right, int k) { |
|---|
| 206 | |
|---|
| 207 | if (left == right) { |
|---|
| 208 | return left; |
|---|
| 209 | } else { |
|---|
| 210 | int middle = partition(attIdx, indices, left, right); |
|---|
| 211 | if ((middle - left + 1) >= k) { |
|---|
| 212 | return select(attIdx, indices, left, middle, k); |
|---|
| 213 | } else { |
|---|
| 214 | return select(attIdx, indices, middle + 1, right, k - (middle - left + 1)); |
|---|
| 215 | } |
|---|
| 216 | } |
|---|
| 217 | } |
|---|
| 218 | |
|---|
| 219 | /** |
|---|
| 220 | * Returns the revision string. |
|---|
| 221 | * |
|---|
| 222 | * @return the revision |
|---|
| 223 | */ |
|---|
| 224 | public String getRevision() { |
|---|
| 225 | return RevisionUtils.extract("$Revision: 5953 $"); |
|---|
| 226 | } |
|---|
| 227 | } |
|---|