source: tags/MetisMQIDemo/src/main/java/weka/datagenerators/classifiers/classification/LED24.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 13.3 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * LED24.java
19 * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.datagenerators.classifiers.classification;
24
25import weka.core.Attribute;
26import weka.core.FastVector;
27import weka.core.Instance; 
28import weka.core.DenseInstance;
29import weka.core.Instances;
30import weka.core.Option;
31import weka.core.RevisionUtils;
32import weka.core.TechnicalInformation;
33import weka.core.TechnicalInformationHandler;
34import weka.core.Utils;
35import weka.core.TechnicalInformation.Field;
36import weka.core.TechnicalInformation.Type;
37import weka.datagenerators.ClassificationGenerator;
38
39import java.util.Enumeration;
40import java.util.Random;
41import java.util.Vector;
42
43/**
44 <!-- globalinfo-start -->
45 * This generator produces data for a display with 7 LEDs. The original output consists of 10 concepts and 7 boolean attributes. Here, in addition to the 7 necessary boolean attributes, 17 other, irrelevant boolean attributes with random values are added to make it harder. By default 10 percent of noise are added to the data.<br/>
46 * <br/>
47 * More information can be found here:<br/>
48 * L. Breiman J.H. Friedman R.A. Olshen, C.J. Stone (1984). Classification and Regression Trees. Belmont, California. URL http://www.ics.uci.edu/~mlearn/databases/led-display-creator/.
49 * <p/>
50 <!-- globalinfo-end -->
51 *
52 * Link: <br/>
53 * <a href="http://www.ics.uci.edu/~mlearn/databases/led-display-creator/">http://www.ics.uci.edu/~mlearn/databases/led-display-creator/</a> <p/>
54 *
55 <!-- technical-bibtex-start -->
56 * BibTeX:
57 * <pre>
58 * &#64;inbook{Olshen1984,
59 *    address = {Belmont, California},
60 *    author = {L. Breiman J.H. Friedman R.A. Olshen and C.J. Stone},
61 *    pages = {43-49},
62 *    publisher = {Wadsworth International Group},
63 *    title = {Classification and Regression Trees},
64 *    year = {1984},
65 *    ISBN = {0412048418},
66 *    URL = {http://www.ics.uci.edu/\~mlearn/databases/led-display-creator/}
67 * }
68 * </pre>
69 * <p/>
70 <!-- technical-bibtex-end -->
71 *
72 <!-- options-start -->
73 * Valid options are: <p/>
74 *
75 * <pre> -h
76 *  Prints this help.</pre>
77 *
78 * <pre> -o &lt;file&gt;
79 *  The name of the output file, otherwise the generated data is
80 *  printed to stdout.</pre>
81 *
82 * <pre> -r &lt;name&gt;
83 *  The name of the relation.</pre>
84 *
85 * <pre> -d
86 *  Whether to print debug informations.</pre>
87 *
88 * <pre> -S
89 *  The seed for random function (default 1)</pre>
90 *
91 * <pre> -n &lt;num&gt;
92 *  The number of examples to generate (default 100)</pre>
93 *
94 * <pre> -N &lt;num&gt;
95 *  The noise percentage. (default 10.0)</pre>
96 *
97 <!-- options-end -->
98 *
99 * @author Richard Kirkby (rkirkby at cs dot waikato dot ac dot nz)
100 * @author FracPete (fracpete at waikato dot ac dot nz)
101 * @version $Revision: 5987 $
102 */
103
104public class LED24
105  extends ClassificationGenerator
106  implements TechnicalInformationHandler {
107 
108  /** for serialization */
109  static final long serialVersionUID = -7880209100415868737L; 
110 
111  /** the noise rate */
112  protected double m_NoisePercent;
113 
114  /** the 7-bit LEDs */
115  protected static final int m_originalInstances[][] = {
116    { 1, 1, 1, 0, 1, 1, 1 }, { 0, 0, 1, 0, 0, 1, 0 },
117    { 1, 0, 1, 1, 1, 0, 1 }, { 1, 0, 1, 1, 0, 1, 1 },
118    { 0, 1, 1, 1, 0, 1, 0 }, { 1, 1, 0, 1, 0, 1, 1 },
119    { 1, 1, 0, 1, 1, 1, 1 }, { 1, 0, 1, 0, 0, 1, 0 },
120    { 1, 1, 1, 1, 1, 1, 1 }, { 1, 1, 1, 1, 0, 1, 1 } };
121
122  /** used for generating the output, i.e., the additional noise attributes */
123  protected int m_numIrrelevantAttributes = 17;
124
125  /**
126   * initializes the generator with default values
127   */
128  public LED24() {
129    super();
130
131    setNoisePercent(defaultNoisePercent());
132  }
133
134  /**
135   * Returns a string describing this data generator.
136   *
137   * @return a description of the data generator suitable for
138   * displaying in the explorer/experimenter gui
139   */
140  public String globalInfo() {
141    return 
142         "This generator produces data for a display with 7 LEDs. The original "
143       + "output consists of 10 concepts and 7 boolean attributes. Here, in "
144       + "addition to the 7 necessary boolean attributes, 17 other, irrelevant "
145       + "boolean attributes with random values are added to make it harder. "
146       + "By default 10 percent of noise are added to the data.\n"
147       + "\n"
148       + "More information can be found here:\n"
149       + getTechnicalInformation().toString();
150  }
151
152  /**
153   * Returns an instance of a TechnicalInformation object, containing
154   * detailed information about the technical background of this class,
155   * e.g., paper reference or book this class is based on.
156   *
157   * @return the technical information about this class
158   */
159  public TechnicalInformation getTechnicalInformation() {
160    TechnicalInformation        result;
161   
162    result = new TechnicalInformation(Type.INBOOK);
163    result.setValue(Field.AUTHOR, "L. Breiman J.H. Friedman R.A. Olshen and C.J. Stone");
164    result.setValue(Field.YEAR, "1984");
165    result.setValue(Field.TITLE, "Classification and Regression Trees");
166    result.setValue(Field.PUBLISHER, "Wadsworth International Group");
167    result.setValue(Field.ADDRESS, "Belmont, California");
168    result.setValue(Field.PAGES, "43-49");
169    result.setValue(Field.ISBN, "0412048418");
170    result.setValue(Field.URL, "http://www.ics.uci.edu/~mlearn/databases/led-display-creator/");
171   
172    return result;
173  }
174
175 /**
176   * Returns an enumeration describing the available options.
177   *
178   * @return an enumeration of all the available options
179   */
180  public Enumeration listOptions() {
181    Vector result = enumToVector(super.listOptions());
182
183    result.add(new Option(
184              "\tThe noise percentage. (default " 
185              + defaultNoisePercent() + ")",
186              "N", 1, "-N <num>"));
187
188    return result.elements();
189  }
190
191  /**
192   * Parses a list of options for this object. <p/>
193   *
194   <!-- options-start -->
195   * Valid options are: <p/>
196   *
197   * <pre> -h
198   *  Prints this help.</pre>
199   *
200   * <pre> -o &lt;file&gt;
201   *  The name of the output file, otherwise the generated data is
202   *  printed to stdout.</pre>
203   *
204   * <pre> -r &lt;name&gt;
205   *  The name of the relation.</pre>
206   *
207   * <pre> -d
208   *  Whether to print debug informations.</pre>
209   *
210   * <pre> -S
211   *  The seed for random function (default 1)</pre>
212   *
213   * <pre> -n &lt;num&gt;
214   *  The number of examples to generate (default 100)</pre>
215   *
216   * <pre> -N &lt;num&gt;
217   *  The noise percentage. (default 10.0)</pre>
218   *
219   <!-- options-end -->
220   *
221   * @param options the list of options as an array of strings
222   * @throws Exception if an option is not supported
223   */
224  public void setOptions(String[] options) throws Exception {
225    String        tmpStr;
226
227    super.setOptions(options);
228
229    tmpStr = Utils.getOption('N', options);
230    if (tmpStr.length() != 0)
231      setNoisePercent(Double.parseDouble(tmpStr));
232    else
233      setNoisePercent(defaultNoisePercent());
234  }
235
236  /**
237   * Gets the current settings of the datagenerator.
238   *
239   * @return an array of strings suitable for passing to setOptions
240   */
241  public String[] getOptions() {
242    Vector        result;
243    String[]      options;
244    int           i;
245   
246    result  = new Vector();
247    options = super.getOptions();
248    for (i = 0; i < options.length; i++)
249      result.add(options[i]);
250   
251    result.add("-N");
252    result.add("" + getNoisePercent());
253   
254    return (String[]) result.toArray(new String[result.size()]);
255  }
256
257  /**
258   * returns the default noise percentage
259   *
260   * @return the default noise percentage
261   */
262  protected double defaultNoisePercent() {
263    return 10;
264  }
265 
266  /**
267   * Gets the noise percentage.
268   *
269   * @return the noise percentage.
270   */
271  public double getNoisePercent() { 
272    return m_NoisePercent; 
273  }
274 
275  /**
276   * Sets the noise percentage.
277   *
278   * @param value the noise percentage.
279   */
280  public void setNoisePercent(double value) { 
281    if ( (value >= 0.0) && (value <= 100.0) )
282      m_NoisePercent = value;
283    else
284      throw new IllegalArgumentException(
285          "Noise percent must be in [0,100] (provided: " + value + ")!");
286  } 
287 
288  /**
289   * Returns the tip text for this property
290   *
291   * @return tip text for this property suitable for
292   *         displaying in the explorer/experimenter gui
293   */
294  public String noisePercentTipText() {
295    return "The noise percent: 0 <= perc <= 100.";
296  }
297
298  /**
299   * Return if single mode is set for the given data generator
300   * mode depends on option setting and or generator type.
301   *
302   * @return single mode flag
303   * @throws Exception if mode is not set yet
304   */
305  public boolean getSingleModeFlag() throws Exception {
306    return true;
307  }
308
309  /**
310   * Initializes the format for the dataset produced.
311   * Must be called before the generateExample or generateExamples
312   * methods are used.
313   * Re-initializes the random number generator with the given seed.
314   *
315   * @return the format for the dataset
316   * @throws Exception if the generating of the format failed
317   * @see  #getSeed()
318   */
319  public Instances defineDataFormat() throws Exception {
320    FastVector      atts;
321    FastVector      attValues;
322    int             i;
323    int             n;
324
325    m_Random = new Random(getSeed());
326
327    // number of examples is the same as given per option
328    setNumExamplesAct(getNumExamples());
329
330    // set up attributes
331    atts = new FastVector();
332   
333    for (n = 1; n <= 24; n++) {
334      attValues = new FastVector();
335      for (i = 0; i < 2; i++)
336        attValues.addElement("" + i);
337      atts.addElement(new Attribute("att" + n, attValues));
338    }
339   
340    attValues = new FastVector();
341    for (i = 0; i < 10; i++)
342      attValues.addElement("" + i);
343    atts.addElement(new Attribute("class", attValues));
344   
345    // dataset
346    m_DatasetFormat = new Instances(getRelationNameToUse(), atts, 0);
347   
348    return m_DatasetFormat;
349  }
350
351  /**
352   * Generates one example of the dataset.
353   *
354   * @return the generated example
355   * @throws Exception if the format of the dataset is not yet defined
356   * @throws Exception if the generator only works with generateExamples
357   * which means in non single mode
358   */
359  public Instance generateExample() throws Exception {
360    Instance      result;
361    double[]      atts;
362    int           i;
363    int           selected;
364    Random        random;
365
366    result = null;
367    random = getRandom();
368
369    if (m_DatasetFormat == null)
370      throw new Exception("Dataset format not defined.");
371
372    atts     = new double[m_DatasetFormat.numAttributes()];
373    selected = random.nextInt(10);
374    for (i = 0; i < 7; i++) {
375      if ((1 + (random.nextInt(100))) <= getNoisePercent())
376        atts[i] = m_originalInstances[selected][i] == 0 ? 1 : 0;
377      else
378        atts[i] = m_originalInstances[selected][i];
379    }
380
381    for (i = 0; i < m_numIrrelevantAttributes; i++)
382      atts[i + 7] = random.nextInt(2);
383
384    atts[atts.length - 1] = selected;
385
386    // create instance
387    result  = new DenseInstance(1.0, atts);
388    result.setDataset(m_DatasetFormat);
389
390    return result;
391  }
392
393  /**
394   * Generates all examples of the dataset. Re-initializes the random number
395   * generator with the given seed, before generating instances.
396   *
397   * @return the generated dataset
398   * @throws Exception if the format of the dataset is not yet defined
399   * @throws Exception if the generator only works with generateExample,
400   * which means in single mode
401   * @see   #getSeed()
402   */
403  public Instances generateExamples() throws Exception {
404    Instances       result;
405    int             i;
406
407    result   = new Instances(m_DatasetFormat, 0);
408    m_Random = new Random(getSeed());
409
410    for (i = 0; i < getNumExamplesAct(); i++)
411      result.add(generateExample());
412   
413    return result;
414  }
415
416  /**
417   * Generates a comment string that documentates the data generator.
418   * By default this string is added at the beginning of the produced output
419   * as ARFF file type, next after the options.
420   *
421   * @return string contains info about the generated rules
422   */
423  public String generateStart () {
424    return "";
425  }
426
427  /**
428   * Generates a comment string that documentats the data generator.
429   * By default this string is added at the end of theproduces output
430   * as ARFF file type.
431   *
432   * @return string contains info about the generated rules
433   * @throws Exception if the generating of the documentaion fails
434   */
435  public String generateFinished() throws Exception {
436    return "";
437  }
438 
439  /**
440   * Returns the revision string.
441   *
442   * @return            the revision
443   */
444  public String getRevision() {
445    return RevisionUtils.extract("$Revision: 5987 $");
446  }
447
448  /**
449   * Main method for executing this class.
450   *
451   * @param args should contain arguments for the data producer:
452   */
453  public static void main(String[] args) {
454    runDataGenerator(new LED24(), args);
455  }
456}
Note: See TracBrowser for help on using the repository browser.