source: src/main/java/weka/core/EuclideanDistance.java @ 6

Last change on this file since 6 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 9.0 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    EuclideanDistance.java
19 *    Copyright (C) 1999-2007 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.core;
24
25import weka.core.TechnicalInformation.Field;
26import weka.core.TechnicalInformation.Type;
27import weka.core.neighboursearch.PerformanceStats;
28
29/**
30 <!-- globalinfo-start -->
31 * Implementing Euclidean distance (or similarity) function.<br/>
32 * <br/>
33 * One object defines not one distance but the data model in which the distances between objects of that data model can be computed.<br/>
34 * <br/>
35 * Attention: For efficiency reasons the use of consistency checks (like are the data models of the two instances exactly the same), is low.<br/>
36 * <br/>
37 * For more information, see:<br/>
38 * <br/>
39 * Wikipedia. Euclidean distance. URL http://en.wikipedia.org/wiki/Euclidean_distance.
40 * <p/>
41 <!-- globalinfo-end -->
42 *
43 <!-- technical-bibtex-start -->
44 * BibTeX:
45 * <pre>
46 * &#64;misc{missing_id,
47 *    author = {Wikipedia},
48 *    title = {Euclidean distance},
49 *    URL = {http://en.wikipedia.org/wiki/Euclidean_distance}
50 * }
51 * </pre>
52 * <p/>
53 <!-- technical-bibtex-end -->
54 *
55 <!-- options-start -->
56 * Valid options are: <p/>
57 *
58 * <pre> -D
59 *  Turns off the normalization of attribute
60 *  values in distance calculation.</pre>
61 *
62 * <pre> -R &lt;col1,col2-col4,...&gt;
63 *  Specifies list of columns to used in the calculation of the
64 *  distance. 'first' and 'last' are valid indices.
65 *  (default: first-last)</pre>
66 *
67 * <pre> -V
68 *  Invert matching sense of column indices.</pre>
69 *
70 <!-- options-end -->
71 *
72 * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz)
73 * @author Ashraf M. Kibriya (amk14@cs.waikato.ac.nz)
74 * @author FracPete (fracpete at waikato dot ac dot nz)
75 * @version $Revision: 5953 $
76 */
77public class EuclideanDistance
78  extends NormalizableDistance
79  implements Cloneable, TechnicalInformationHandler {
80
81  /** for serialization. */
82  private static final long serialVersionUID = 1068606253458807903L;
83
84  /**
85   * Constructs an Euclidean Distance object, Instances must be still set.
86   */
87  public EuclideanDistance() {
88    super();
89  }
90
91  /**
92   * Constructs an Euclidean Distance object and automatically initializes the
93   * ranges.
94   *
95   * @param data        the instances the distance function should work on
96   */
97  public EuclideanDistance(Instances data) {
98    super(data);
99  }
100
101  /**
102   * Returns a string describing this object.
103   *
104   * @return            a description of the evaluator suitable for
105   *                    displaying in the explorer/experimenter gui
106   */
107  public String globalInfo() {
108    return 
109        "Implementing Euclidean distance (or similarity) function.\n\n"
110      + "One object defines not one distance but the data model in which "
111      + "the distances between objects of that data model can be computed.\n\n"
112      + "Attention: For efficiency reasons the use of consistency checks "
113      + "(like are the data models of the two instances exactly the same), "
114      + "is low.\n\n"
115      + "For more information, see:\n\n"
116      + getTechnicalInformation().toString();
117  }
118
119  /**
120   * Returns an instance of a TechnicalInformation object, containing
121   * detailed information about the technical background of this class,
122   * e.g., paper reference or book this class is based on.
123   *
124   * @return            the technical information about this class
125   */
126  public TechnicalInformation getTechnicalInformation() {
127    TechnicalInformation        result;
128   
129    result = new TechnicalInformation(Type.MISC);
130    result.setValue(Field.AUTHOR, "Wikipedia");
131    result.setValue(Field.TITLE, "Euclidean distance");
132    result.setValue(Field.URL, "http://en.wikipedia.org/wiki/Euclidean_distance");
133
134    return result;
135  }
136 
137  /**
138   * Calculates the distance between two instances.
139   *
140   * @param first       the first instance
141   * @param second      the second instance
142   * @return            the distance between the two given instances
143   */
144  public double distance(Instance first, Instance second) {
145    return Math.sqrt(distance(first, second, Double.POSITIVE_INFINITY));
146  }
147 
148  /**
149   * Calculates the distance (or similarity) between two instances. Need to
150   * pass this returned distance later on to postprocess method to set it on
151   * correct scale. <br/>
152   * P.S.: Please don't mix the use of this function with
153   * distance(Instance first, Instance second), as that already does post
154   * processing. Please consider passing Double.POSITIVE_INFINITY as the cutOffValue to
155   * this function and then later on do the post processing on all the
156   * distances.
157   *
158   * @param first       the first instance
159   * @param second      the second instance
160   * @param stats       the structure for storing performance statistics.
161   * @return            the distance between the two given instances or
162   *                    Double.POSITIVE_INFINITY.
163   */
164  public double distance(Instance first, Instance second, PerformanceStats stats) { //debug method pls remove after use
165    return Math.sqrt(distance(first, second, Double.POSITIVE_INFINITY, stats));
166  }
167 
168  /**
169   * Updates the current distance calculated so far with the new difference
170   * between two attributes. The difference between the attributes was
171   * calculated with the difference(int,double,double) method.
172   *
173   * @param currDist    the current distance calculated so far
174   * @param diff        the difference between two new attributes
175   * @return            the update distance
176   * @see               #difference(int, double, double)
177   */
178  protected double updateDistance(double currDist, double diff) {
179    double      result;
180   
181    result  = currDist;
182    result += diff * diff;
183   
184    return result;
185  }
186 
187  /**
188   * Does post processing of the distances (if necessary) returned by
189   * distance(distance(Instance first, Instance second, double cutOffValue). It
190   * is necessary to do so to get the correct distances if
191   * distance(distance(Instance first, Instance second, double cutOffValue) is
192   * used. This is because that function actually returns the squared distance
193   * to avoid inaccuracies arising from floating point comparison.
194   *
195   * @param distances   the distances to post-process
196   */
197  public void postProcessDistances(double distances[]) {
198    for(int i = 0; i < distances.length; i++) {
199      distances[i] = Math.sqrt(distances[i]);
200    }
201  }
202 
203  /**
204   * Returns the squared difference of two values of an attribute.
205   *
206   * @param index       the attribute index
207   * @param val1        the first value
208   * @param val2        the second value
209   * @return            the squared difference
210   */
211  public double sqDifference(int index, double val1, double val2) {
212    double val = difference(index, val1, val2);
213    return val*val;
214  }
215 
216  /**
217   * Returns value in the middle of the two parameter values.
218   *
219   * @param ranges      the ranges to this dimension
220   * @return            the middle value
221   */
222  public double getMiddle(double[] ranges) {
223
224    double middle = ranges[R_MIN] + ranges[R_WIDTH] * 0.5;
225    return middle;
226  }
227 
228  /**
229   * Returns the index of the closest point to the current instance.
230   * Index is index in Instances object that is the second parameter.
231   *
232   * @param instance    the instance to assign a cluster to
233   * @param allPoints   all points
234   * @param pointList   the list of points
235   * @return            the index of the closest point
236   * @throws Exception  if something goes wrong
237   */
238  public int closestPoint(Instance instance, Instances allPoints,
239                          int[] pointList) throws Exception {
240    double minDist = Integer.MAX_VALUE;
241    int bestPoint = 0;
242    for (int i = 0; i < pointList.length; i++) {
243      double dist = distance(instance, allPoints.instance(pointList[i]), Double.POSITIVE_INFINITY);
244      if (dist < minDist) {
245        minDist = dist;
246        bestPoint = i;
247      }
248    }
249    return pointList[bestPoint];
250  }
251 
252  /**
253   * Returns true if the value of the given dimension is smaller or equal the
254   * value to be compared with.
255   *
256   * @param instance    the instance where the value should be taken of
257   * @param dim         the dimension of the value
258   * @param value       the value to compare with
259   * @return            true if value of instance is smaller or equal value
260   */
261  public boolean valueIsSmallerEqual(Instance instance, int dim,
262                                     double value) {  //This stays
263    return instance.value(dim) <= value;
264  }
265 
266  /**
267   * Returns the revision string.
268   *
269   * @return            the revision
270   */
271  public String getRevision() {
272    return RevisionUtils.extract("$Revision: 5953 $");
273  }
274}
Note: See TracBrowser for help on using the repository browser.