source: src/main/java/weka/classifiers/misc/HyperPipes.java @ 17

Last change on this file since 17 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 11.5 KB
RevLine 
[4]1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    HyperPipes.java
19 *    Copyright (C) 2002 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.classifiers.misc;
24
25import weka.classifiers.Classifier;
26import weka.classifiers.AbstractClassifier;
27import weka.core.Attribute;
28import weka.core.Capabilities;
29import weka.core.Instance;
30import weka.core.Instances;
31import weka.core.RevisionHandler;
32import weka.core.RevisionUtils;
33import weka.core.UnsupportedAttributeTypeException;
34import weka.core.Utils;
35import weka.core.Capabilities.Capability;
36
37import java.io.Serializable;
38
39/**
40 <!-- globalinfo-start -->
41 * Class implementing a HyperPipe classifier. For each category a HyperPipe is constructed that contains all points of that category (essentially records the attribute bounds observed for each category). Test instances are classified according to the category that "most contains the instance".<br/>
42 * Does not handle numeric class, or missing values in test cases. Extremely simple algorithm, but has the advantage of being extremely fast, and works quite well when you have "smegloads" of attributes.
43 * <p/>
44 <!-- globalinfo-end -->
45 *
46 <!-- options-start -->
47 * Valid options are: <p/>
48 *
49 * <pre> -D
50 *  If set, classifier is run in debug mode and
51 *  may output additional info to the console</pre>
52 *
53 <!-- options-end -->
54 *
55 * @author Lucio de Souza Coelho (lucio@intelligenesis.net)
56 * @author Len Trigg (len@reeltwo.com)
57 * @version $Revision: 5928 $
58 */ 
59public class HyperPipes 
60  extends AbstractClassifier {
61
62  /** for serialization */
63  static final long serialVersionUID = -7527596632268975274L;
64 
65  /** The index of the class attribute */
66  protected int m_ClassIndex;
67
68  /** The structure of the training data */
69  protected Instances m_Instances;
70
71  /** Stores the HyperPipe for each class */
72  protected HyperPipe [] m_HyperPipes;
73
74  /** a ZeroR model in case no model can be built from the data */
75  protected Classifier m_ZeroR;
76   
77  /**
78   * Returns a string describing classifier
79   * @return a description suitable for
80   * displaying in the explorer/experimenter gui
81   */
82  public String globalInfo() {
83
84    return "Class implementing a HyperPipe classifier. For each category a "
85    +  "HyperPipe is constructed that contains all points of that category "
86      + "(essentially records the attribute bounds observed for each category). "
87      + "Test instances are classified according to the category that \"most "
88      + "contains the instance\".\n" 
89      + "Does not handle numeric class, or missing values in test cases. Extremely "
90      + "simple algorithm, but has the advantage of being extremely fast, and "
91      + "works quite well when you have \"smegloads\" of attributes.";
92  }
93
94  /**
95   * Represents an n-dimensional structure that bounds all instances
96   * passed to it (generally all of a given class value).
97   */
98  class HyperPipe 
99    implements Serializable, RevisionHandler {
100   
101    /** for serialization */
102    static final long serialVersionUID = 3972254260367902025L;
103
104    /** Contains the numeric bounds of all instances in the HyperPipe */
105    protected double [][] m_NumericBounds;
106
107    /** Contains the nominal bounds of all instances in the HyperPipe */
108    protected boolean [][] m_NominalBounds;
109
110    /**
111     * Creates the HyperPipe as the n-dimensional parallel-piped
112     * with minimum volume containing all the points in
113     * pointSet.
114     *
115     * @param instances all instances belonging to the same class
116     * @throws Exception if missing values are found
117     */
118    public HyperPipe(Instances instances) throws Exception {
119     
120      m_NumericBounds = new double [instances.numAttributes()][];
121      m_NominalBounds = new boolean [instances.numAttributes()][];
122
123      for (int i = 0; i < instances.numAttributes(); i++) {
124        switch (instances.attribute(i).type()) {
125        case Attribute.NUMERIC:
126          m_NumericBounds[i] = new double [2];
127          m_NumericBounds[i][0] = Double.POSITIVE_INFINITY;
128          m_NumericBounds[i][1] = Double.NEGATIVE_INFINITY;
129          break;
130        case Attribute.NOMINAL:
131          m_NominalBounds[i] = new boolean [instances.attribute(i).numValues()];
132          break;
133        default:
134          throw new UnsupportedAttributeTypeException("Cannot process string attributes!");
135        }
136      }
137
138      for (int i = 0; i < instances.numInstances(); i++) {
139        addInstance(instances.instance(i));
140      }
141    }
142
143
144    /**
145     * Updates the bounds arrays with a single instance. Missing values
146     * are ignored (i.e. they don't change the bounds for that attribute)
147     *
148     * @param instance the instance
149     * @throws Exception if any missing values are encountered
150     */
151    public void addInstance(Instance instance) throws Exception {
152
153      for (int j = 0; j < instance.numAttributes(); j++) {
154        if ((j != m_ClassIndex) && (!instance.isMissing(j))) {
155
156          double current = instance.value(j);
157
158          if (m_NumericBounds[j] != null) { // i.e. a numeric attribute
159            if (current < m_NumericBounds[j][0])
160              m_NumericBounds[j][0] = current;
161            if (current > m_NumericBounds[j][1])
162              m_NumericBounds[j][1] = current;
163
164          } else { // i.e. a nominal attribute
165            m_NominalBounds[j][(int) current] = true;
166          }
167        }
168      }
169    }
170
171
172    /**
173     * Returns the fraction of the dimensions of a given instance with
174     * values lying within the corresponding bounds of the HyperPipe.
175     *
176     * @param instance the instance
177     * @return the fraction of dimensions
178     * @throws Exception if any missing values are encountered
179     */
180    public double partialContains(Instance instance) throws Exception {
181     
182      int count = 0;
183      for (int i = 0; i < instance.numAttributes(); i++) {
184
185        if (i == m_ClassIndex) {
186          continue;
187        }
188        if (instance.isMissing(i)) {
189          continue;
190        }
191
192        double current = instance.value(i);
193
194        if (m_NumericBounds[i] != null) { // i.e. a numeric attribute
195          if ((current >= m_NumericBounds[i][0]) 
196              && (current <= m_NumericBounds[i][1])) {
197            count++;
198          }
199        } else { // i.e. a nominal attribute
200          if (m_NominalBounds[i][(int) current]) {
201            count++;
202          }
203        }
204      }
205
206      return ((double)count) / (instance.numAttributes() - 1);
207    }
208   
209    /**
210     * Returns the revision string.
211     *
212     * @return          the revision
213     */
214    public String getRevision() {
215      return RevisionUtils.extract("$Revision: 5928 $");
216    }
217  }
218
219
220  /**
221   * Returns default capabilities of the classifier.
222   *
223   * @return      the capabilities of this classifier
224   */
225  public Capabilities getCapabilities() {
226    Capabilities result = super.getCapabilities();
227    result.disableAll();
228
229    // attributes
230    result.enable(Capability.NOMINAL_ATTRIBUTES);
231    result.enable(Capability.NUMERIC_ATTRIBUTES);
232    result.enable(Capability.DATE_ATTRIBUTES);
233    result.enable(Capability.MISSING_VALUES);
234
235    // class
236    result.enable(Capability.NOMINAL_CLASS);
237    result.enable(Capability.MISSING_CLASS_VALUES);
238
239    // instances
240    result.setMinimumNumberInstances(0);
241   
242    return result;
243  }
244
245  /**
246   * Generates the classifier.
247   *
248   * @param instances set of instances serving as training data
249   * @throws Exception if the classifier has not been generated successfully
250   */
251  public void buildClassifier(Instances instances) throws Exception {
252   
253    // can classifier handle the data?
254    getCapabilities().testWithFail(instances);
255
256    // remove instances with missing class
257    instances = new Instances(instances);
258    instances.deleteWithMissingClass();
259   
260    // only class? -> build ZeroR model
261    if (instances.numAttributes() == 1) {
262      System.err.println(
263          "Cannot build model (only class attribute present in data!), "
264          + "using ZeroR model instead!");
265      m_ZeroR = new weka.classifiers.rules.ZeroR();
266      m_ZeroR.buildClassifier(instances);
267      return;
268    }
269    else {
270      m_ZeroR = null;
271    }
272   
273    m_ClassIndex = instances.classIndex();
274    m_Instances = new Instances(instances, 0); // Copy the structure for ref
275
276    // Create the HyperPipe for each class
277    m_HyperPipes = new HyperPipe [instances.numClasses()];
278    for (int i = 0; i < m_HyperPipes.length; i++) {
279      m_HyperPipes[i] = new HyperPipe(new Instances(instances, 0));
280    }
281
282    // Add the instances
283    for (int i = 0; i < instances.numInstances(); i++) {
284      updateClassifier(instances.instance(i));
285    }
286  }
287
288
289  /**
290   * Updates the classifier.
291   *
292   * @param instance the instance to be put into the classifier
293   * @throws Exception if the instance could not be included successfully
294   */
295  public void updateClassifier(Instance instance) throws Exception {
296 
297    if (instance.classIsMissing()) {
298      return;
299    }
300    m_HyperPipes[(int) instance.classValue()].addInstance(instance);
301  }
302
303
304  /**
305   * Classifies the given test instance.
306   *
307   * @param instance the instance to be classified
308   * @return the predicted class for the instance
309   * @throws Exception if the instance can't be classified
310   */
311  public double [] distributionForInstance(Instance instance) throws Exception {
312       
313    // default model?
314    if (m_ZeroR != null) {
315      return m_ZeroR.distributionForInstance(instance);
316    }
317   
318    double [] dist = new double[m_HyperPipes.length];
319
320    for (int j = 0; j < m_HyperPipes.length; j++) {
321      dist[j] = m_HyperPipes[j].partialContains(instance);
322    }
323
324    double sum = Utils.sum(dist);
325    if (sum <= 0) {
326      for (int j = 0; j < dist.length; j++) {
327        dist[j] = 1.0 / (double)dist.length;
328      }
329      return dist;
330    } else {
331      Utils.normalize(dist, sum);
332      return dist;
333    }
334  }
335
336
337  /**
338   * Returns a description of this classifier.
339   *
340   * @return a description of this classifier as a string.
341   */
342  public String toString() {
343
344    // only ZeroR model?
345    if (m_ZeroR != null) {
346      StringBuffer buf = new StringBuffer();
347      buf.append(this.getClass().getName().replaceAll(".*\\.", "") + "\n");
348      buf.append(this.getClass().getName().replaceAll(".*\\.", "").replaceAll(".", "=") + "\n\n");
349      buf.append("Warning: No model could be built, hence ZeroR model is used:\n\n");
350      buf.append(m_ZeroR.toString());
351      return buf.toString();
352    }
353   
354    if (m_HyperPipes == null) {
355      return ("HyperPipes classifier");
356    }
357
358    StringBuffer text = new StringBuffer("HyperPipes classifier\n");
359
360    /* Perhaps print out the bounds for each HyperPipe.
361    for (int i = 0; i < m_HyperPipes.length; i++) {
362      text.append("HyperPipe for class: "
363                  + m_Instances.attribute(m_ClassIndex).value(i) + "\n");
364      text.append(m_HyperPipes[i] + "\n\n");
365    }
366    */
367
368    return text.toString();
369  }
370 
371  /**
372   * Returns the revision string.
373   *
374   * @return            the revision
375   */
376  public String getRevision() {
377    return RevisionUtils.extract("$Revision: 5928 $");
378  }
379
380  /**
381   * Main method for testing this class.
382   *
383   * @param argv should contain command line arguments for evaluation
384   * (see Evaluation).
385   */
386  public static void main(String [] argv) {
387    runClassifier(new HyperPipes(), argv);
388  }
389}
Note: See TracBrowser for help on using the repository browser.