source: src/main/java/weka/core/EditDistance.java @ 6

Last change on this file since 6 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 3.2 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    AbstractStringDistanceFunction.java
19 *    Copyright (C) 2008 Bruno Woltzenlogel Paleo (http://www.logic.at/people/bruno/ ; http://bruno-wp.blogspot.com/)
20 *
21 */
22
23package weka.core;
24
25/**
26 * Computes the Levenshtein edit distance between two strings.
27 *
28 * @author Bruno Woltzenlogel Paleo
29 * @version $Revision: 5987 $
30 */
31public class EditDistance
32    extends AbstractStringDistanceFunction {
33
34  public EditDistance() {
35  }
36
37  public EditDistance(Instances data) {
38    super(data);
39  }
40
41  /**
42   * Calculates the distance (Levenshtein Edit Distance) between two strings
43   *
44   * @param stringA the first string
45   * @param stringB the second string
46   * @return the distance between the two given strings
47   */
48  double stringDistance(String stringA, String stringB) {
49    int lengthA = stringA.length();
50    int lengthB = stringB.length();
51
52    double[][] distanceMatrix = new double[lengthA + 1][lengthB + 1];
53
54    for (int i = 0; i <= lengthA; i++) {
55      distanceMatrix[i][0] = i;
56    }
57
58    for (int j = 1; j <= lengthB; j++) {
59      distanceMatrix[0][j] = j;
60    }
61
62    for (int i = 1; i <= lengthA; i++) {
63      for (int j = 1; j <= lengthB; j++) {
64        if (stringA.charAt(i - 1) == stringB.charAt(j - 1)) {
65          distanceMatrix[i][j] = distanceMatrix[i - 1][j - 1];
66        }
67        else {
68          distanceMatrix[i][j] = 1 + Math.min(distanceMatrix[i - 1][j],
69                                              Math.min(distanceMatrix[i][j - 1],
70                                                       distanceMatrix[i - 1][j - 1]));
71        }
72      }
73    }
74    return distanceMatrix[lengthA][lengthB];
75  }
76
77   
78  /**
79   * Returns a string describing this object.
80   *
81   * @return            a description of the evaluator suitable for
82   *                    displaying in the explorer/experimenter gui
83   */
84  public String globalInfo() {
85    return 
86      "Implementing Levenshtein distance function.\n\n"
87      + "One object defines not one distance but the data model in which "
88      + "the distances between objects of that data model can be computed.\n\n"
89      + "Attention: For efficiency reasons the use of consistency checks "
90      + "(like are the data models of the two instances exactly the same), "
91      + "is low.\n\n"
92      + "For more information, see: http://en.wikipedia.org/wiki/Levenshtein_distance\n\n";
93  } 
94 
95  /**
96   * Returns the revision string.
97   *
98   * @return            the revision
99   */
100  public String getRevision() {
101    return RevisionUtils.extract("$Revision: 5987 $");
102  }
103}
Note: See TracBrowser for help on using the repository browser.