[4] | 1 | /* |
---|
| 2 | * This program is free software; you can redistribute it and/or modify |
---|
| 3 | * it under the terms of the GNU General Public License as published by |
---|
| 4 | * the Free Software Foundation; either version 2 of the License, or |
---|
| 5 | * (at your option) any later version. |
---|
| 6 | * |
---|
| 7 | * This program is distributed in the hope that it will be useful, |
---|
| 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
| 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
| 10 | * GNU General Public License for more details. |
---|
| 11 | * |
---|
| 12 | * You should have received a copy of the GNU General Public License |
---|
| 13 | * along with this program; if not, write to the Free Software |
---|
| 14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
| 15 | */ |
---|
| 16 | |
---|
| 17 | /* |
---|
| 18 | * IB1.java |
---|
| 19 | * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand |
---|
| 20 | * |
---|
| 21 | */ |
---|
| 22 | |
---|
| 23 | package weka.classifiers.lazy; |
---|
| 24 | |
---|
| 25 | import weka.classifiers.Classifier; |
---|
| 26 | import weka.classifiers.AbstractClassifier; |
---|
| 27 | import weka.classifiers.UpdateableClassifier; |
---|
| 28 | import weka.core.Capabilities; |
---|
| 29 | import weka.core.Instance; |
---|
| 30 | import weka.core.Instances; |
---|
| 31 | import weka.core.RevisionUtils; |
---|
| 32 | import weka.core.TechnicalInformation; |
---|
| 33 | import weka.core.TechnicalInformationHandler; |
---|
| 34 | import weka.core.Utils; |
---|
| 35 | import weka.core.Capabilities.Capability; |
---|
| 36 | import weka.core.TechnicalInformation.Field; |
---|
| 37 | import weka.core.TechnicalInformation.Type; |
---|
| 38 | |
---|
| 39 | import java.util.Enumeration; |
---|
| 40 | |
---|
| 41 | /** |
---|
| 42 | <!-- globalinfo-start --> |
---|
| 43 | * Nearest-neighbour classifier. Uses normalized Euclidean distance to find the training instance closest to the given test instance, and predicts the same class as this training instance. If multiple instances have the same (smallest) distance to the test instance, the first one found is used.<br/> |
---|
| 44 | * <br/> |
---|
| 45 | * For more information, see <br/> |
---|
| 46 | * <br/> |
---|
| 47 | * D. Aha, D. Kibler (1991). Instance-based learning algorithms. Machine Learning. 6:37-66. |
---|
| 48 | * <p/> |
---|
| 49 | <!-- globalinfo-end --> |
---|
| 50 | * |
---|
| 51 | <!-- technical-bibtex-start --> |
---|
| 52 | * BibTeX: |
---|
| 53 | * <pre> |
---|
| 54 | * @article{Aha1991, |
---|
| 55 | * author = {D. Aha and D. Kibler}, |
---|
| 56 | * journal = {Machine Learning}, |
---|
| 57 | * pages = {37-66}, |
---|
| 58 | * title = {Instance-based learning algorithms}, |
---|
| 59 | * volume = {6}, |
---|
| 60 | * year = {1991} |
---|
| 61 | * } |
---|
| 62 | * </pre> |
---|
| 63 | * <p/> |
---|
| 64 | <!-- technical-bibtex-end --> |
---|
| 65 | * |
---|
| 66 | <!-- options-start --> |
---|
| 67 | * Valid options are: <p/> |
---|
| 68 | * |
---|
| 69 | * <pre> -D |
---|
| 70 | * If set, classifier is run in debug mode and |
---|
| 71 | * may output additional info to the console</pre> |
---|
| 72 | * |
---|
| 73 | <!-- options-end --> |
---|
| 74 | * |
---|
| 75 | * @author Stuart Inglis (singlis@cs.waikato.ac.nz) |
---|
| 76 | * @author Len Trigg (trigg@cs.waikato.ac.nz) |
---|
| 77 | * @author Eibe Frank (eibe@cs.waikato.ac.nz) |
---|
| 78 | * @version $Revision: 5928 $ |
---|
| 79 | */ |
---|
| 80 | public class IB1 |
---|
| 81 | extends AbstractClassifier |
---|
| 82 | implements UpdateableClassifier, TechnicalInformationHandler { |
---|
| 83 | |
---|
| 84 | /** for serialization */ |
---|
| 85 | static final long serialVersionUID = -6152184127304895851L; |
---|
| 86 | |
---|
| 87 | /** The training instances used for classification. */ |
---|
| 88 | private Instances m_Train; |
---|
| 89 | |
---|
| 90 | /** The minimum values for numeric attributes. */ |
---|
| 91 | private double [] m_MinArray; |
---|
| 92 | |
---|
| 93 | /** The maximum values for numeric attributes. */ |
---|
| 94 | private double [] m_MaxArray; |
---|
| 95 | |
---|
| 96 | /** |
---|
| 97 | * Returns a string describing classifier |
---|
| 98 | * @return a description suitable for |
---|
| 99 | * displaying in the explorer/experimenter gui |
---|
| 100 | */ |
---|
| 101 | public String globalInfo() { |
---|
| 102 | |
---|
| 103 | return "Nearest-neighbour classifier. Uses normalized Euclidean distance to " |
---|
| 104 | + "find the training instance closest to the given test instance, and predicts " |
---|
| 105 | + "the same class as this training instance. If multiple instances have " |
---|
| 106 | + "the same (smallest) distance to the test instance, the first one found is " |
---|
| 107 | + "used.\n\n" |
---|
| 108 | + "For more information, see \n\n" |
---|
| 109 | + getTechnicalInformation().toString(); |
---|
| 110 | } |
---|
| 111 | |
---|
| 112 | /** |
---|
| 113 | * Returns an instance of a TechnicalInformation object, containing |
---|
| 114 | * detailed information about the technical background of this class, |
---|
| 115 | * e.g., paper reference or book this class is based on. |
---|
| 116 | * |
---|
| 117 | * @return the technical information about this class |
---|
| 118 | */ |
---|
| 119 | public TechnicalInformation getTechnicalInformation() { |
---|
| 120 | TechnicalInformation result; |
---|
| 121 | |
---|
| 122 | result = new TechnicalInformation(Type.ARTICLE); |
---|
| 123 | result.setValue(Field.AUTHOR, "D. Aha and D. Kibler"); |
---|
| 124 | result.setValue(Field.YEAR, "1991"); |
---|
| 125 | result.setValue(Field.TITLE, "Instance-based learning algorithms"); |
---|
| 126 | result.setValue(Field.JOURNAL, "Machine Learning"); |
---|
| 127 | result.setValue(Field.VOLUME, "6"); |
---|
| 128 | result.setValue(Field.PAGES, "37-66"); |
---|
| 129 | |
---|
| 130 | return result; |
---|
| 131 | } |
---|
| 132 | |
---|
| 133 | /** |
---|
| 134 | * Returns default capabilities of the classifier. |
---|
| 135 | * |
---|
| 136 | * @return the capabilities of this classifier |
---|
| 137 | */ |
---|
| 138 | public Capabilities getCapabilities() { |
---|
| 139 | Capabilities result = super.getCapabilities(); |
---|
| 140 | result.disableAll(); |
---|
| 141 | |
---|
| 142 | // attributes |
---|
| 143 | result.enable(Capability.NOMINAL_ATTRIBUTES); |
---|
| 144 | result.enable(Capability.NUMERIC_ATTRIBUTES); |
---|
| 145 | result.enable(Capability.DATE_ATTRIBUTES); |
---|
| 146 | result.enable(Capability.MISSING_VALUES); |
---|
| 147 | |
---|
| 148 | // class |
---|
| 149 | result.enable(Capability.NOMINAL_CLASS); |
---|
| 150 | result.enable(Capability.MISSING_CLASS_VALUES); |
---|
| 151 | |
---|
| 152 | // instances |
---|
| 153 | result.setMinimumNumberInstances(0); |
---|
| 154 | |
---|
| 155 | return result; |
---|
| 156 | } |
---|
| 157 | |
---|
| 158 | /** |
---|
| 159 | * Generates the classifier. |
---|
| 160 | * |
---|
| 161 | * @param instances set of instances serving as training data |
---|
| 162 | * @throws Exception if the classifier has not been generated successfully |
---|
| 163 | */ |
---|
| 164 | public void buildClassifier(Instances instances) throws Exception { |
---|
| 165 | |
---|
| 166 | // can classifier handle the data? |
---|
| 167 | getCapabilities().testWithFail(instances); |
---|
| 168 | |
---|
| 169 | // remove instances with missing class |
---|
| 170 | instances = new Instances(instances); |
---|
| 171 | instances.deleteWithMissingClass(); |
---|
| 172 | |
---|
| 173 | m_Train = new Instances(instances, 0, instances.numInstances()); |
---|
| 174 | |
---|
| 175 | m_MinArray = new double [m_Train.numAttributes()]; |
---|
| 176 | m_MaxArray = new double [m_Train.numAttributes()]; |
---|
| 177 | for (int i = 0; i < m_Train.numAttributes(); i++) { |
---|
| 178 | m_MinArray[i] = m_MaxArray[i] = Double.NaN; |
---|
| 179 | } |
---|
| 180 | Enumeration enu = m_Train.enumerateInstances(); |
---|
| 181 | while (enu.hasMoreElements()) { |
---|
| 182 | updateMinMax((Instance) enu.nextElement()); |
---|
| 183 | } |
---|
| 184 | } |
---|
| 185 | |
---|
| 186 | /** |
---|
| 187 | * Updates the classifier. |
---|
| 188 | * |
---|
| 189 | * @param instance the instance to be put into the classifier |
---|
| 190 | * @throws Exception if the instance could not be included successfully |
---|
| 191 | */ |
---|
| 192 | public void updateClassifier(Instance instance) throws Exception { |
---|
| 193 | |
---|
| 194 | if (m_Train.equalHeaders(instance.dataset()) == false) { |
---|
| 195 | throw new Exception("Incompatible instance types\n" + m_Train.equalHeadersMsg(instance.dataset())); |
---|
| 196 | } |
---|
| 197 | if (instance.classIsMissing()) { |
---|
| 198 | return; |
---|
| 199 | } |
---|
| 200 | m_Train.add(instance); |
---|
| 201 | updateMinMax(instance); |
---|
| 202 | } |
---|
| 203 | |
---|
| 204 | /** |
---|
| 205 | * Classifies the given test instance. |
---|
| 206 | * |
---|
| 207 | * @param instance the instance to be classified |
---|
| 208 | * @return the predicted class for the instance |
---|
| 209 | * @throws Exception if the instance can't be classified |
---|
| 210 | */ |
---|
| 211 | public double classifyInstance(Instance instance) throws Exception { |
---|
| 212 | |
---|
| 213 | if (m_Train.numInstances() == 0) { |
---|
| 214 | throw new Exception("No training instances!"); |
---|
| 215 | } |
---|
| 216 | |
---|
| 217 | double distance, minDistance = Double.MAX_VALUE, classValue = 0; |
---|
| 218 | updateMinMax(instance); |
---|
| 219 | Enumeration enu = m_Train.enumerateInstances(); |
---|
| 220 | while (enu.hasMoreElements()) { |
---|
| 221 | Instance trainInstance = (Instance) enu.nextElement(); |
---|
| 222 | if (!trainInstance.classIsMissing()) { |
---|
| 223 | distance = distance(instance, trainInstance); |
---|
| 224 | if (distance < minDistance) { |
---|
| 225 | minDistance = distance; |
---|
| 226 | classValue = trainInstance.classValue(); |
---|
| 227 | } |
---|
| 228 | } |
---|
| 229 | } |
---|
| 230 | |
---|
| 231 | return classValue; |
---|
| 232 | } |
---|
| 233 | |
---|
| 234 | /** |
---|
| 235 | * Returns a description of this classifier. |
---|
| 236 | * |
---|
| 237 | * @return a description of this classifier as a string. |
---|
| 238 | */ |
---|
| 239 | public String toString() { |
---|
| 240 | |
---|
| 241 | return ("IB1 classifier"); |
---|
| 242 | } |
---|
| 243 | |
---|
| 244 | /** |
---|
| 245 | * Calculates the distance between two instances |
---|
| 246 | * |
---|
| 247 | * @param first the first instance |
---|
| 248 | * @param second the second instance |
---|
| 249 | * @return the distance between the two given instances |
---|
| 250 | */ |
---|
| 251 | private double distance(Instance first, Instance second) { |
---|
| 252 | |
---|
| 253 | double diff, distance = 0; |
---|
| 254 | |
---|
| 255 | for(int i = 0; i < m_Train.numAttributes(); i++) { |
---|
| 256 | if (i == m_Train.classIndex()) { |
---|
| 257 | continue; |
---|
| 258 | } |
---|
| 259 | if (m_Train.attribute(i).isNominal()) { |
---|
| 260 | |
---|
| 261 | // If attribute is nominal |
---|
| 262 | if (first.isMissing(i) || second.isMissing(i) || |
---|
| 263 | ((int)first.value(i) != (int)second.value(i))) { |
---|
| 264 | distance += 1; |
---|
| 265 | } |
---|
| 266 | } else { |
---|
| 267 | |
---|
| 268 | // If attribute is numeric |
---|
| 269 | if (first.isMissing(i) || second.isMissing(i)){ |
---|
| 270 | if (first.isMissing(i) && second.isMissing(i)) { |
---|
| 271 | diff = 1; |
---|
| 272 | } else { |
---|
| 273 | if (second.isMissing(i)) { |
---|
| 274 | diff = norm(first.value(i), i); |
---|
| 275 | } else { |
---|
| 276 | diff = norm(second.value(i), i); |
---|
| 277 | } |
---|
| 278 | if (diff < 0.5) { |
---|
| 279 | diff = 1.0 - diff; |
---|
| 280 | } |
---|
| 281 | } |
---|
| 282 | } else { |
---|
| 283 | diff = norm(first.value(i), i) - norm(second.value(i), i); |
---|
| 284 | } |
---|
| 285 | distance += diff * diff; |
---|
| 286 | } |
---|
| 287 | } |
---|
| 288 | |
---|
| 289 | return distance; |
---|
| 290 | } |
---|
| 291 | |
---|
| 292 | /** |
---|
| 293 | * Normalizes a given value of a numeric attribute. |
---|
| 294 | * |
---|
| 295 | * @param x the value to be normalized |
---|
| 296 | * @param i the attribute's index |
---|
| 297 | * @return the normalized value |
---|
| 298 | */ |
---|
| 299 | private double norm(double x,int i) { |
---|
| 300 | |
---|
| 301 | if (Double.isNaN(m_MinArray[i]) |
---|
| 302 | || Utils.eq(m_MaxArray[i], m_MinArray[i])) { |
---|
| 303 | return 0; |
---|
| 304 | } else { |
---|
| 305 | return (x - m_MinArray[i]) / (m_MaxArray[i] - m_MinArray[i]); |
---|
| 306 | } |
---|
| 307 | } |
---|
| 308 | |
---|
| 309 | /** |
---|
| 310 | * Updates the minimum and maximum values for all the attributes |
---|
| 311 | * based on a new instance. |
---|
| 312 | * |
---|
| 313 | * @param instance the new instance |
---|
| 314 | */ |
---|
| 315 | private void updateMinMax(Instance instance) { |
---|
| 316 | |
---|
| 317 | for (int j = 0;j < m_Train.numAttributes(); j++) { |
---|
| 318 | if ((m_Train.attribute(j).isNumeric()) && (!instance.isMissing(j))) { |
---|
| 319 | if (Double.isNaN(m_MinArray[j])) { |
---|
| 320 | m_MinArray[j] = instance.value(j); |
---|
| 321 | m_MaxArray[j] = instance.value(j); |
---|
| 322 | } else { |
---|
| 323 | if (instance.value(j) < m_MinArray[j]) { |
---|
| 324 | m_MinArray[j] = instance.value(j); |
---|
| 325 | } else { |
---|
| 326 | if (instance.value(j) > m_MaxArray[j]) { |
---|
| 327 | m_MaxArray[j] = instance.value(j); |
---|
| 328 | } |
---|
| 329 | } |
---|
| 330 | } |
---|
| 331 | } |
---|
| 332 | } |
---|
| 333 | } |
---|
| 334 | |
---|
| 335 | /** |
---|
| 336 | * Returns the revision string. |
---|
| 337 | * |
---|
| 338 | * @return the revision |
---|
| 339 | */ |
---|
| 340 | public String getRevision() { |
---|
| 341 | return RevisionUtils.extract("$Revision: 5928 $"); |
---|
| 342 | } |
---|
| 343 | |
---|
| 344 | /** |
---|
| 345 | * Main method for testing this class. |
---|
| 346 | * |
---|
| 347 | * @param argv should contain command line arguments for evaluation |
---|
| 348 | * (see Evaluation). |
---|
| 349 | */ |
---|
| 350 | public static void main(String [] argv) { |
---|
| 351 | runClassifier(new IB1(), argv); |
---|
| 352 | } |
---|
| 353 | } |
---|