source: src/main/java/weka/classifiers/lazy/IB1.java @ 24

Last change on this file since 24 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 10.0 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    IB1.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.classifiers.lazy;
24
25import weka.classifiers.Classifier;
26import weka.classifiers.AbstractClassifier;
27import weka.classifiers.UpdateableClassifier;
28import weka.core.Capabilities;
29import weka.core.Instance;
30import weka.core.Instances;
31import weka.core.RevisionUtils;
32import weka.core.TechnicalInformation;
33import weka.core.TechnicalInformationHandler;
34import weka.core.Utils;
35import weka.core.Capabilities.Capability;
36import weka.core.TechnicalInformation.Field;
37import weka.core.TechnicalInformation.Type;
38
39import java.util.Enumeration;
40
41/**
42 <!-- globalinfo-start -->
43 * Nearest-neighbour classifier. Uses normalized Euclidean distance to find the training instance closest to the given test instance, and predicts the same class as this training instance. If multiple instances have the same (smallest) distance to the test instance, the first one found is used.<br/>
44 * <br/>
45 * For more information, see <br/>
46 * <br/>
47 * D. Aha, D. Kibler (1991). Instance-based learning algorithms. Machine Learning. 6:37-66.
48 * <p/>
49 <!-- globalinfo-end -->
50 *
51 <!-- technical-bibtex-start -->
52 * BibTeX:
53 * <pre>
54 * &#64;article{Aha1991,
55 *    author = {D. Aha and D. Kibler},
56 *    journal = {Machine Learning},
57 *    pages = {37-66},
58 *    title = {Instance-based learning algorithms},
59 *    volume = {6},
60 *    year = {1991}
61 * }
62 * </pre>
63 * <p/>
64 <!-- technical-bibtex-end -->
65 *
66 <!-- options-start -->
67 * Valid options are: <p/>
68 *
69 * <pre> -D
70 *  If set, classifier is run in debug mode and
71 *  may output additional info to the console</pre>
72 *
73 <!-- options-end -->
74 *
75 * @author Stuart Inglis (singlis@cs.waikato.ac.nz)
76 * @author Len Trigg (trigg@cs.waikato.ac.nz)
77 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
78 * @version $Revision: 5928 $
79 */
80public class IB1 
81  extends AbstractClassifier
82  implements UpdateableClassifier, TechnicalInformationHandler {
83
84  /** for serialization */
85  static final long serialVersionUID = -6152184127304895851L;
86 
87  /** The training instances used for classification. */
88  private Instances m_Train;
89
90  /** The minimum values for numeric attributes. */
91  private double [] m_MinArray;
92
93  /** The maximum values for numeric attributes. */
94  private double [] m_MaxArray;
95
96  /**
97   * Returns a string describing classifier
98   * @return a description suitable for
99   * displaying in the explorer/experimenter gui
100   */
101  public String globalInfo() {
102
103    return "Nearest-neighbour classifier. Uses normalized Euclidean distance to " 
104      + "find the training instance closest to the given test instance, and predicts "
105      + "the same class as this training instance. If multiple instances have "
106      + "the same (smallest) distance to the test instance, the first one found is "
107      + "used.\n\n"
108      + "For more information, see \n\n"
109      + getTechnicalInformation().toString();
110  }
111
112  /**
113   * Returns an instance of a TechnicalInformation object, containing
114   * detailed information about the technical background of this class,
115   * e.g., paper reference or book this class is based on.
116   *
117   * @return the technical information about this class
118   */
119  public TechnicalInformation getTechnicalInformation() {
120    TechnicalInformation        result;
121   
122    result = new TechnicalInformation(Type.ARTICLE);
123    result.setValue(Field.AUTHOR, "D. Aha and D. Kibler");
124    result.setValue(Field.YEAR, "1991");
125    result.setValue(Field.TITLE, "Instance-based learning algorithms");
126    result.setValue(Field.JOURNAL, "Machine Learning");
127    result.setValue(Field.VOLUME, "6");
128    result.setValue(Field.PAGES, "37-66");
129   
130    return result;
131  }
132
133  /**
134   * Returns default capabilities of the classifier.
135   *
136   * @return      the capabilities of this classifier
137   */
138  public Capabilities getCapabilities() {
139    Capabilities result = super.getCapabilities();
140    result.disableAll();
141
142    // attributes
143    result.enable(Capability.NOMINAL_ATTRIBUTES);
144    result.enable(Capability.NUMERIC_ATTRIBUTES);
145    result.enable(Capability.DATE_ATTRIBUTES);
146    result.enable(Capability.MISSING_VALUES);
147
148    // class
149    result.enable(Capability.NOMINAL_CLASS);
150    result.enable(Capability.MISSING_CLASS_VALUES);
151
152    // instances
153    result.setMinimumNumberInstances(0);
154   
155    return result;
156  }
157
158  /**
159   * Generates the classifier.
160   *
161   * @param instances set of instances serving as training data
162   * @throws Exception if the classifier has not been generated successfully
163   */
164  public void buildClassifier(Instances instances) throws Exception {
165   
166    // can classifier handle the data?
167    getCapabilities().testWithFail(instances);
168
169    // remove instances with missing class
170    instances = new Instances(instances);
171    instances.deleteWithMissingClass();
172   
173    m_Train = new Instances(instances, 0, instances.numInstances());
174
175    m_MinArray = new double [m_Train.numAttributes()];
176    m_MaxArray = new double [m_Train.numAttributes()];
177    for (int i = 0; i < m_Train.numAttributes(); i++) {
178      m_MinArray[i] = m_MaxArray[i] = Double.NaN;
179    }
180    Enumeration enu = m_Train.enumerateInstances();
181    while (enu.hasMoreElements()) {
182      updateMinMax((Instance) enu.nextElement());
183    }
184  }
185
186  /**
187   * Updates the classifier.
188   *
189   * @param instance the instance to be put into the classifier
190   * @throws Exception if the instance could not be included successfully
191   */
192  public void updateClassifier(Instance instance) throws Exception {
193 
194    if (m_Train.equalHeaders(instance.dataset()) == false) {
195      throw new Exception("Incompatible instance types\n" + m_Train.equalHeadersMsg(instance.dataset()));
196    }
197    if (instance.classIsMissing()) {
198      return;
199    }
200    m_Train.add(instance);
201    updateMinMax(instance);
202  }
203
204  /**
205   * Classifies the given test instance.
206   *
207   * @param instance the instance to be classified
208   * @return the predicted class for the instance
209   * @throws Exception if the instance can't be classified
210   */
211  public double classifyInstance(Instance instance) throws Exception {
212   
213    if (m_Train.numInstances() == 0) {
214      throw new Exception("No training instances!");
215    }
216
217    double distance, minDistance = Double.MAX_VALUE, classValue = 0;
218    updateMinMax(instance);
219    Enumeration enu = m_Train.enumerateInstances();
220    while (enu.hasMoreElements()) {
221      Instance trainInstance = (Instance) enu.nextElement();
222      if (!trainInstance.classIsMissing()) {
223        distance = distance(instance, trainInstance);
224        if (distance < minDistance) {
225          minDistance = distance;
226          classValue = trainInstance.classValue();
227        }
228      }
229    }
230
231    return classValue;
232  }
233
234  /**
235   * Returns a description of this classifier.
236   *
237   * @return a description of this classifier as a string.
238   */
239  public String toString() {
240
241    return ("IB1 classifier");
242  }
243
244  /**
245   * Calculates the distance between two instances
246   *
247   * @param first the first instance
248   * @param second the second instance
249   * @return the distance between the two given instances
250   */         
251  private double distance(Instance first, Instance second) {
252   
253    double diff, distance = 0;
254
255    for(int i = 0; i < m_Train.numAttributes(); i++) { 
256      if (i == m_Train.classIndex()) {
257        continue;
258      }
259      if (m_Train.attribute(i).isNominal()) {
260
261        // If attribute is nominal
262        if (first.isMissing(i) || second.isMissing(i) ||
263            ((int)first.value(i) != (int)second.value(i))) {
264          distance += 1;
265        }
266      } else {
267       
268        // If attribute is numeric
269        if (first.isMissing(i) || second.isMissing(i)){
270          if (first.isMissing(i) && second.isMissing(i)) {
271            diff = 1;
272          } else {
273            if (second.isMissing(i)) {
274              diff = norm(first.value(i), i);
275            } else {
276              diff = norm(second.value(i), i);
277            }
278            if (diff < 0.5) {
279              diff = 1.0 - diff;
280            }
281          }
282        } else {
283          diff = norm(first.value(i), i) - norm(second.value(i), i);
284        }
285        distance += diff * diff;
286      }
287    }
288   
289    return distance;
290  }
291   
292  /**
293   * Normalizes a given value of a numeric attribute.
294   *
295   * @param x the value to be normalized
296   * @param i the attribute's index
297   * @return the normalized value
298   */
299  private double norm(double x,int i) {
300
301    if (Double.isNaN(m_MinArray[i])
302        || Utils.eq(m_MaxArray[i], m_MinArray[i])) {
303      return 0;
304    } else {
305      return (x - m_MinArray[i]) / (m_MaxArray[i] - m_MinArray[i]);
306    }
307  }
308
309  /**
310   * Updates the minimum and maximum values for all the attributes
311   * based on a new instance.
312   *
313   * @param instance the new instance
314   */
315  private void updateMinMax(Instance instance) {
316   
317    for (int j = 0;j < m_Train.numAttributes(); j++) {
318      if ((m_Train.attribute(j).isNumeric()) && (!instance.isMissing(j))) {
319        if (Double.isNaN(m_MinArray[j])) {
320          m_MinArray[j] = instance.value(j);
321          m_MaxArray[j] = instance.value(j);
322        } else {
323          if (instance.value(j) < m_MinArray[j]) {
324            m_MinArray[j] = instance.value(j);
325          } else {
326            if (instance.value(j) > m_MaxArray[j]) {
327              m_MaxArray[j] = instance.value(j);
328            }
329          }
330        }
331      }
332    }
333  }
334 
335  /**
336   * Returns the revision string.
337   *
338   * @return            the revision
339   */
340  public String getRevision() {
341    return RevisionUtils.extract("$Revision: 5928 $");
342  }
343
344  /**
345   * Main method for testing this class.
346   *
347   * @param argv should contain command line arguments for evaluation
348   * (see Evaluation).
349   */
350  public static void main(String [] argv) {
351    runClassifier(new IB1(), argv);
352  }
353}
Note: See TracBrowser for help on using the repository browser.