source: src/main/java/weka/core/AbstractStringDistanceFunction.java @ 6

Last change on this file since 6 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 4.6 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    AbstractStringDistanceFunction.java
19 *    Copyright (C) 2008 Bruno Woltzenlogel Paleo (http://www.logic.at/people/bruno/ ; http://bruno-wp.blogspot.com/)
20 *
21 */
22
23package weka.core;
24
25import weka.core.neighboursearch.PerformanceStats;
26
27/**
28 * Represents the abstract ancestor for string-based distance functions, like
29 * EditDistance.
30 *
31 * @author Bruno Woltzenlogel Paleo
32 * @version $Revision: 5987 $
33 */
34public abstract class AbstractStringDistanceFunction
35    extends NormalizableDistance {
36 
37  /**
38   * Constructor that doesn't set the data
39   */
40  public AbstractStringDistanceFunction() {
41    super();
42  }
43
44  /**
45   * Constructor that sets the data
46   *
47   * @param data the set of instances that will be used for
48   * later distance comparisons
49   */
50  public AbstractStringDistanceFunction(Instances data) {
51    super(data);
52  }
53
54   
55  /**
56   * Updates the current distance calculated so far with the new difference
57   * between two attributes. The difference between the attributes was
58   * calculated with the difference(int,double,double) method.
59   *
60   * @param currDist    the current distance calculated so far
61   * @param diff        the difference between two new attributes
62   * @return            the update distance
63   * @see               #difference(int, double, double)
64   */
65  protected double updateDistance(double currDist, double diff) {
66    return (currDist + (diff * diff));
67  }
68
69  /**
70   * Computes the difference between two given attribute
71   * values.
72   *
73   * @param index       the attribute index
74   * @param val1        the first value
75   * @param val2        the second value
76   * @return            the difference
77   */
78  protected double difference(int index, String string1, String string2) {
79    switch (m_Data.attribute(index).type()) {
80    case Attribute.STRING:
81      double diff = stringDistance(string1, string2);
82      if (m_DontNormalize == true) {
83        return diff;
84      }
85      else {
86        if (string1.length() > string2.length()) {
87          return diff/((double) string1.length()); 
88        }
89        else {
90          return diff/((double) string2.length());   
91        }
92      }
93
94    default:
95      return 0;
96    }
97  }
98 
99  /**
100   * Calculates the distance between two instances. Offers speed up (if the
101   * distance function class in use supports it) in nearest neighbour search by
102   * taking into account the cutOff or maximum distance. Depending on the
103   * distance function class, post processing of the distances by
104   * postProcessDistances(double []) may be required if this function is used.
105   *
106   * @param first       the first instance
107   * @param second      the second instance
108   * @param cutOffValue If the distance being calculated becomes larger than
109   *                    cutOffValue then the rest of the calculation is
110   *                    discarded.
111   * @param stats       the performance stats object
112   * @return            the distance between the two given instances or
113   *                    Double.POSITIVE_INFINITY if the distance being
114   *                    calculated becomes larger than cutOffValue.
115   */
116  @Override
117    public double distance(Instance first, Instance second, double cutOffValue, PerformanceStats stats) {
118    double sqDistance = 0;
119    int numAttributes = m_Data.numAttributes();
120   
121    validate();
122   
123    double diff;
124   
125    for (int i = 0; i < numAttributes; i++) {
126      diff = 0;
127      if (m_ActiveIndices[i]) {
128        diff = difference(i, first.stringValue(i), second.stringValue(i));
129      }
130      sqDistance = updateDistance(sqDistance, diff);
131      if (sqDistance > (cutOffValue * cutOffValue)) return Double.POSITIVE_INFINITY;
132    } 
133    double distance = Math.sqrt(sqDistance);
134    return distance;
135  }
136 
137  /**
138   * Calculates the distance between two strings.
139   * Must be implemented by any non-abstract StringDistance class
140   *
141   * @param stringA the first string
142   * @param stringB the second string
143   * @return the distance between the two given strings
144   */
145  abstract double stringDistance(String stringA, String stringB);
146
147}
Note: See TracBrowser for help on using the repository browser.