[4] | 1 | /* |
---|
| 2 | * This program is free software; you can redistribute it and/or modify |
---|
| 3 | * it under the terms of the GNU General Public License as published by |
---|
| 4 | * the Free Software Foundation; either version 2 of the License, or |
---|
| 5 | * (at your option) any later version. |
---|
| 6 | * |
---|
| 7 | * This program is distributed in the hope that it will be useful, |
---|
| 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
| 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
| 10 | * GNU General Public License for more details. |
---|
| 11 | * |
---|
| 12 | * You should have received a copy of the GNU General Public License |
---|
| 13 | * along with this program; if not, write to the Free Software |
---|
| 14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
| 15 | */ |
---|
| 16 | |
---|
| 17 | /* |
---|
| 18 | * HyperPipes.java |
---|
| 19 | * Copyright (C) 2002 University of Waikato, Hamilton, New Zealand |
---|
| 20 | * |
---|
| 21 | */ |
---|
| 22 | |
---|
| 23 | package weka.classifiers.misc; |
---|
| 24 | |
---|
| 25 | import weka.classifiers.Classifier; |
---|
| 26 | import weka.classifiers.AbstractClassifier; |
---|
| 27 | import weka.core.Attribute; |
---|
| 28 | import weka.core.Capabilities; |
---|
| 29 | import weka.core.Instance; |
---|
| 30 | import weka.core.Instances; |
---|
| 31 | import weka.core.RevisionHandler; |
---|
| 32 | import weka.core.RevisionUtils; |
---|
| 33 | import weka.core.UnsupportedAttributeTypeException; |
---|
| 34 | import weka.core.Utils; |
---|
| 35 | import weka.core.Capabilities.Capability; |
---|
| 36 | |
---|
| 37 | import java.io.Serializable; |
---|
| 38 | |
---|
| 39 | /** |
---|
| 40 | <!-- globalinfo-start --> |
---|
| 41 | * Class implementing a HyperPipe classifier. For each category a HyperPipe is constructed that contains all points of that category (essentially records the attribute bounds observed for each category). Test instances are classified according to the category that "most contains the instance".<br/> |
---|
| 42 | * Does not handle numeric class, or missing values in test cases. Extremely simple algorithm, but has the advantage of being extremely fast, and works quite well when you have "smegloads" of attributes. |
---|
| 43 | * <p/> |
---|
| 44 | <!-- globalinfo-end --> |
---|
| 45 | * |
---|
| 46 | <!-- options-start --> |
---|
| 47 | * Valid options are: <p/> |
---|
| 48 | * |
---|
| 49 | * <pre> -D |
---|
| 50 | * If set, classifier is run in debug mode and |
---|
| 51 | * may output additional info to the console</pre> |
---|
| 52 | * |
---|
| 53 | <!-- options-end --> |
---|
| 54 | * |
---|
| 55 | * @author Lucio de Souza Coelho (lucio@intelligenesis.net) |
---|
| 56 | * @author Len Trigg (len@reeltwo.com) |
---|
| 57 | * @version $Revision: 5928 $ |
---|
| 58 | */ |
---|
| 59 | public class HyperPipes |
---|
| 60 | extends AbstractClassifier { |
---|
| 61 | |
---|
| 62 | /** for serialization */ |
---|
| 63 | static final long serialVersionUID = -7527596632268975274L; |
---|
| 64 | |
---|
| 65 | /** The index of the class attribute */ |
---|
| 66 | protected int m_ClassIndex; |
---|
| 67 | |
---|
| 68 | /** The structure of the training data */ |
---|
| 69 | protected Instances m_Instances; |
---|
| 70 | |
---|
| 71 | /** Stores the HyperPipe for each class */ |
---|
| 72 | protected HyperPipe [] m_HyperPipes; |
---|
| 73 | |
---|
| 74 | /** a ZeroR model in case no model can be built from the data */ |
---|
| 75 | protected Classifier m_ZeroR; |
---|
| 76 | |
---|
| 77 | /** |
---|
| 78 | * Returns a string describing classifier |
---|
| 79 | * @return a description suitable for |
---|
| 80 | * displaying in the explorer/experimenter gui |
---|
| 81 | */ |
---|
| 82 | public String globalInfo() { |
---|
| 83 | |
---|
| 84 | return "Class implementing a HyperPipe classifier. For each category a " |
---|
| 85 | + "HyperPipe is constructed that contains all points of that category " |
---|
| 86 | + "(essentially records the attribute bounds observed for each category). " |
---|
| 87 | + "Test instances are classified according to the category that \"most " |
---|
| 88 | + "contains the instance\".\n" |
---|
| 89 | + "Does not handle numeric class, or missing values in test cases. Extremely " |
---|
| 90 | + "simple algorithm, but has the advantage of being extremely fast, and " |
---|
| 91 | + "works quite well when you have \"smegloads\" of attributes."; |
---|
| 92 | } |
---|
| 93 | |
---|
| 94 | /** |
---|
| 95 | * Represents an n-dimensional structure that bounds all instances |
---|
| 96 | * passed to it (generally all of a given class value). |
---|
| 97 | */ |
---|
| 98 | class HyperPipe |
---|
| 99 | implements Serializable, RevisionHandler { |
---|
| 100 | |
---|
| 101 | /** for serialization */ |
---|
| 102 | static final long serialVersionUID = 3972254260367902025L; |
---|
| 103 | |
---|
| 104 | /** Contains the numeric bounds of all instances in the HyperPipe */ |
---|
| 105 | protected double [][] m_NumericBounds; |
---|
| 106 | |
---|
| 107 | /** Contains the nominal bounds of all instances in the HyperPipe */ |
---|
| 108 | protected boolean [][] m_NominalBounds; |
---|
| 109 | |
---|
| 110 | /** |
---|
| 111 | * Creates the HyperPipe as the n-dimensional parallel-piped |
---|
| 112 | * with minimum volume containing all the points in |
---|
| 113 | * pointSet. |
---|
| 114 | * |
---|
| 115 | * @param instances all instances belonging to the same class |
---|
| 116 | * @throws Exception if missing values are found |
---|
| 117 | */ |
---|
| 118 | public HyperPipe(Instances instances) throws Exception { |
---|
| 119 | |
---|
| 120 | m_NumericBounds = new double [instances.numAttributes()][]; |
---|
| 121 | m_NominalBounds = new boolean [instances.numAttributes()][]; |
---|
| 122 | |
---|
| 123 | for (int i = 0; i < instances.numAttributes(); i++) { |
---|
| 124 | switch (instances.attribute(i).type()) { |
---|
| 125 | case Attribute.NUMERIC: |
---|
| 126 | m_NumericBounds[i] = new double [2]; |
---|
| 127 | m_NumericBounds[i][0] = Double.POSITIVE_INFINITY; |
---|
| 128 | m_NumericBounds[i][1] = Double.NEGATIVE_INFINITY; |
---|
| 129 | break; |
---|
| 130 | case Attribute.NOMINAL: |
---|
| 131 | m_NominalBounds[i] = new boolean [instances.attribute(i).numValues()]; |
---|
| 132 | break; |
---|
| 133 | default: |
---|
| 134 | throw new UnsupportedAttributeTypeException("Cannot process string attributes!"); |
---|
| 135 | } |
---|
| 136 | } |
---|
| 137 | |
---|
| 138 | for (int i = 0; i < instances.numInstances(); i++) { |
---|
| 139 | addInstance(instances.instance(i)); |
---|
| 140 | } |
---|
| 141 | } |
---|
| 142 | |
---|
| 143 | |
---|
| 144 | /** |
---|
| 145 | * Updates the bounds arrays with a single instance. Missing values |
---|
| 146 | * are ignored (i.e. they don't change the bounds for that attribute) |
---|
| 147 | * |
---|
| 148 | * @param instance the instance |
---|
| 149 | * @throws Exception if any missing values are encountered |
---|
| 150 | */ |
---|
| 151 | public void addInstance(Instance instance) throws Exception { |
---|
| 152 | |
---|
| 153 | for (int j = 0; j < instance.numAttributes(); j++) { |
---|
| 154 | if ((j != m_ClassIndex) && (!instance.isMissing(j))) { |
---|
| 155 | |
---|
| 156 | double current = instance.value(j); |
---|
| 157 | |
---|
| 158 | if (m_NumericBounds[j] != null) { // i.e. a numeric attribute |
---|
| 159 | if (current < m_NumericBounds[j][0]) |
---|
| 160 | m_NumericBounds[j][0] = current; |
---|
| 161 | if (current > m_NumericBounds[j][1]) |
---|
| 162 | m_NumericBounds[j][1] = current; |
---|
| 163 | |
---|
| 164 | } else { // i.e. a nominal attribute |
---|
| 165 | m_NominalBounds[j][(int) current] = true; |
---|
| 166 | } |
---|
| 167 | } |
---|
| 168 | } |
---|
| 169 | } |
---|
| 170 | |
---|
| 171 | |
---|
| 172 | /** |
---|
| 173 | * Returns the fraction of the dimensions of a given instance with |
---|
| 174 | * values lying within the corresponding bounds of the HyperPipe. |
---|
| 175 | * |
---|
| 176 | * @param instance the instance |
---|
| 177 | * @return the fraction of dimensions |
---|
| 178 | * @throws Exception if any missing values are encountered |
---|
| 179 | */ |
---|
| 180 | public double partialContains(Instance instance) throws Exception { |
---|
| 181 | |
---|
| 182 | int count = 0; |
---|
| 183 | for (int i = 0; i < instance.numAttributes(); i++) { |
---|
| 184 | |
---|
| 185 | if (i == m_ClassIndex) { |
---|
| 186 | continue; |
---|
| 187 | } |
---|
| 188 | if (instance.isMissing(i)) { |
---|
| 189 | continue; |
---|
| 190 | } |
---|
| 191 | |
---|
| 192 | double current = instance.value(i); |
---|
| 193 | |
---|
| 194 | if (m_NumericBounds[i] != null) { // i.e. a numeric attribute |
---|
| 195 | if ((current >= m_NumericBounds[i][0]) |
---|
| 196 | && (current <= m_NumericBounds[i][1])) { |
---|
| 197 | count++; |
---|
| 198 | } |
---|
| 199 | } else { // i.e. a nominal attribute |
---|
| 200 | if (m_NominalBounds[i][(int) current]) { |
---|
| 201 | count++; |
---|
| 202 | } |
---|
| 203 | } |
---|
| 204 | } |
---|
| 205 | |
---|
| 206 | return ((double)count) / (instance.numAttributes() - 1); |
---|
| 207 | } |
---|
| 208 | |
---|
| 209 | /** |
---|
| 210 | * Returns the revision string. |
---|
| 211 | * |
---|
| 212 | * @return the revision |
---|
| 213 | */ |
---|
| 214 | public String getRevision() { |
---|
| 215 | return RevisionUtils.extract("$Revision: 5928 $"); |
---|
| 216 | } |
---|
| 217 | } |
---|
| 218 | |
---|
| 219 | |
---|
| 220 | /** |
---|
| 221 | * Returns default capabilities of the classifier. |
---|
| 222 | * |
---|
| 223 | * @return the capabilities of this classifier |
---|
| 224 | */ |
---|
| 225 | public Capabilities getCapabilities() { |
---|
| 226 | Capabilities result = super.getCapabilities(); |
---|
| 227 | result.disableAll(); |
---|
| 228 | |
---|
| 229 | // attributes |
---|
| 230 | result.enable(Capability.NOMINAL_ATTRIBUTES); |
---|
| 231 | result.enable(Capability.NUMERIC_ATTRIBUTES); |
---|
| 232 | result.enable(Capability.DATE_ATTRIBUTES); |
---|
| 233 | result.enable(Capability.MISSING_VALUES); |
---|
| 234 | |
---|
| 235 | // class |
---|
| 236 | result.enable(Capability.NOMINAL_CLASS); |
---|
| 237 | result.enable(Capability.MISSING_CLASS_VALUES); |
---|
| 238 | |
---|
| 239 | // instances |
---|
| 240 | result.setMinimumNumberInstances(0); |
---|
| 241 | |
---|
| 242 | return result; |
---|
| 243 | } |
---|
| 244 | |
---|
| 245 | /** |
---|
| 246 | * Generates the classifier. |
---|
| 247 | * |
---|
| 248 | * @param instances set of instances serving as training data |
---|
| 249 | * @throws Exception if the classifier has not been generated successfully |
---|
| 250 | */ |
---|
| 251 | public void buildClassifier(Instances instances) throws Exception { |
---|
| 252 | |
---|
| 253 | // can classifier handle the data? |
---|
| 254 | getCapabilities().testWithFail(instances); |
---|
| 255 | |
---|
| 256 | // remove instances with missing class |
---|
| 257 | instances = new Instances(instances); |
---|
| 258 | instances.deleteWithMissingClass(); |
---|
| 259 | |
---|
| 260 | // only class? -> build ZeroR model |
---|
| 261 | if (instances.numAttributes() == 1) { |
---|
| 262 | System.err.println( |
---|
| 263 | "Cannot build model (only class attribute present in data!), " |
---|
| 264 | + "using ZeroR model instead!"); |
---|
| 265 | m_ZeroR = new weka.classifiers.rules.ZeroR(); |
---|
| 266 | m_ZeroR.buildClassifier(instances); |
---|
| 267 | return; |
---|
| 268 | } |
---|
| 269 | else { |
---|
| 270 | m_ZeroR = null; |
---|
| 271 | } |
---|
| 272 | |
---|
| 273 | m_ClassIndex = instances.classIndex(); |
---|
| 274 | m_Instances = new Instances(instances, 0); // Copy the structure for ref |
---|
| 275 | |
---|
| 276 | // Create the HyperPipe for each class |
---|
| 277 | m_HyperPipes = new HyperPipe [instances.numClasses()]; |
---|
| 278 | for (int i = 0; i < m_HyperPipes.length; i++) { |
---|
| 279 | m_HyperPipes[i] = new HyperPipe(new Instances(instances, 0)); |
---|
| 280 | } |
---|
| 281 | |
---|
| 282 | // Add the instances |
---|
| 283 | for (int i = 0; i < instances.numInstances(); i++) { |
---|
| 284 | updateClassifier(instances.instance(i)); |
---|
| 285 | } |
---|
| 286 | } |
---|
| 287 | |
---|
| 288 | |
---|
| 289 | /** |
---|
| 290 | * Updates the classifier. |
---|
| 291 | * |
---|
| 292 | * @param instance the instance to be put into the classifier |
---|
| 293 | * @throws Exception if the instance could not be included successfully |
---|
| 294 | */ |
---|
| 295 | public void updateClassifier(Instance instance) throws Exception { |
---|
| 296 | |
---|
| 297 | if (instance.classIsMissing()) { |
---|
| 298 | return; |
---|
| 299 | } |
---|
| 300 | m_HyperPipes[(int) instance.classValue()].addInstance(instance); |
---|
| 301 | } |
---|
| 302 | |
---|
| 303 | |
---|
| 304 | /** |
---|
| 305 | * Classifies the given test instance. |
---|
| 306 | * |
---|
| 307 | * @param instance the instance to be classified |
---|
| 308 | * @return the predicted class for the instance |
---|
| 309 | * @throws Exception if the instance can't be classified |
---|
| 310 | */ |
---|
| 311 | public double [] distributionForInstance(Instance instance) throws Exception { |
---|
| 312 | |
---|
| 313 | // default model? |
---|
| 314 | if (m_ZeroR != null) { |
---|
| 315 | return m_ZeroR.distributionForInstance(instance); |
---|
| 316 | } |
---|
| 317 | |
---|
| 318 | double [] dist = new double[m_HyperPipes.length]; |
---|
| 319 | |
---|
| 320 | for (int j = 0; j < m_HyperPipes.length; j++) { |
---|
| 321 | dist[j] = m_HyperPipes[j].partialContains(instance); |
---|
| 322 | } |
---|
| 323 | |
---|
| 324 | double sum = Utils.sum(dist); |
---|
| 325 | if (sum <= 0) { |
---|
| 326 | for (int j = 0; j < dist.length; j++) { |
---|
| 327 | dist[j] = 1.0 / (double)dist.length; |
---|
| 328 | } |
---|
| 329 | return dist; |
---|
| 330 | } else { |
---|
| 331 | Utils.normalize(dist, sum); |
---|
| 332 | return dist; |
---|
| 333 | } |
---|
| 334 | } |
---|
| 335 | |
---|
| 336 | |
---|
| 337 | /** |
---|
| 338 | * Returns a description of this classifier. |
---|
| 339 | * |
---|
| 340 | * @return a description of this classifier as a string. |
---|
| 341 | */ |
---|
| 342 | public String toString() { |
---|
| 343 | |
---|
| 344 | // only ZeroR model? |
---|
| 345 | if (m_ZeroR != null) { |
---|
| 346 | StringBuffer buf = new StringBuffer(); |
---|
| 347 | buf.append(this.getClass().getName().replaceAll(".*\\.", "") + "\n"); |
---|
| 348 | buf.append(this.getClass().getName().replaceAll(".*\\.", "").replaceAll(".", "=") + "\n\n"); |
---|
| 349 | buf.append("Warning: No model could be built, hence ZeroR model is used:\n\n"); |
---|
| 350 | buf.append(m_ZeroR.toString()); |
---|
| 351 | return buf.toString(); |
---|
| 352 | } |
---|
| 353 | |
---|
| 354 | if (m_HyperPipes == null) { |
---|
| 355 | return ("HyperPipes classifier"); |
---|
| 356 | } |
---|
| 357 | |
---|
| 358 | StringBuffer text = new StringBuffer("HyperPipes classifier\n"); |
---|
| 359 | |
---|
| 360 | /* Perhaps print out the bounds for each HyperPipe. |
---|
| 361 | for (int i = 0; i < m_HyperPipes.length; i++) { |
---|
| 362 | text.append("HyperPipe for class: " |
---|
| 363 | + m_Instances.attribute(m_ClassIndex).value(i) + "\n"); |
---|
| 364 | text.append(m_HyperPipes[i] + "\n\n"); |
---|
| 365 | } |
---|
| 366 | */ |
---|
| 367 | |
---|
| 368 | return text.toString(); |
---|
| 369 | } |
---|
| 370 | |
---|
| 371 | /** |
---|
| 372 | * Returns the revision string. |
---|
| 373 | * |
---|
| 374 | * @return the revision |
---|
| 375 | */ |
---|
| 376 | public String getRevision() { |
---|
| 377 | return RevisionUtils.extract("$Revision: 5928 $"); |
---|
| 378 | } |
---|
| 379 | |
---|
| 380 | /** |
---|
| 381 | * Main method for testing this class. |
---|
| 382 | * |
---|
| 383 | * @param argv should contain command line arguments for evaluation |
---|
| 384 | * (see Evaluation). |
---|
| 385 | */ |
---|
| 386 | public static void main(String [] argv) { |
---|
| 387 | runClassifier(new HyperPipes(), argv); |
---|
| 388 | } |
---|
| 389 | } |
---|