source: tags/MetisMQIDemo/src/main/java/weka/datagenerators/classifiers/classification/RandomRBF.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 16.9 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * RandomRBF.java
19 * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.datagenerators.classifiers.classification;
24
25import weka.core.Attribute;
26import weka.core.FastVector;
27import weka.core.Instance; 
28import weka.core.DenseInstance;
29import weka.core.Instances;
30import weka.core.Option;
31import weka.core.RevisionUtils;
32import weka.core.Utils;
33import weka.datagenerators.ClassificationGenerator;
34
35import java.util.Enumeration;
36import java.util.Random;
37import java.util.Vector;
38
39/**
40 <!-- globalinfo-start -->
41 * RandomRBF data is generated by first creating a random set of centers for each class. Each center is randomly assigned a weight, a central point per attribute, and a standard deviation. To generate new instances, a center is chosen at random taking the weights of each center into consideration. Attribute values are randomly generated and offset from the center, where the overall vector has been scaled so that its length equals a value sampled randomly from the Gaussian distribution of the center. The particular center chosen determines the class of the instance.<br/>
42 *  RandomRBF data contains only numeric attributes as it is non-trivial to include nominal values.
43 * <p/>
44 <!-- globalinfo-end -->
45 *
46 <!-- options-start -->
47 * Valid options are: <p/>
48 *
49 * <pre> -h
50 *  Prints this help.</pre>
51 *
52 * <pre> -o &lt;file&gt;
53 *  The name of the output file, otherwise the generated data is
54 *  printed to stdout.</pre>
55 *
56 * <pre> -r &lt;name&gt;
57 *  The name of the relation.</pre>
58 *
59 * <pre> -d
60 *  Whether to print debug informations.</pre>
61 *
62 * <pre> -S
63 *  The seed for random function (default 1)</pre>
64 *
65 * <pre> -n &lt;num&gt;
66 *  The number of examples to generate (default 100)</pre>
67 *
68 * <pre> -a &lt;num&gt;
69 *  The number of attributes (default 10).</pre>
70 *
71 * <pre> -c &lt;num&gt;
72 *  The number of classes (default 2)</pre>
73 *
74 * <pre> -C &lt;num&gt;
75 *  The number of centroids to use. (default 50)</pre>
76 *
77 <!-- options-end -->
78 *
79 * @author Richard Kirkby (rkirkby at cs dot waikato dot ac dot nz)
80 * @author FracPete (fracpete at waikato dot ac dot nz)
81 * @version $Revision: 5987 $
82 */
83
84public class RandomRBF
85  extends ClassificationGenerator {
86
87  /** for serialization */
88  static final long serialVersionUID = 6069033710635728720L; 
89 
90  /** Number of attribute the dataset should have */
91  protected int m_NumAttributes;
92
93  /** Number of Classes the dataset should have */
94  protected int m_NumClasses;
95
96  /** the number of centroids to use for generation */
97  protected int m_NumCentroids;
98 
99  /** the centroids */
100  protected double[][] m_centroids;
101 
102  /** the classes of the centroids */
103  protected int[] m_centroidClasses;
104 
105  /** the weights of the centroids */
106  protected double[] m_centroidWeights;
107 
108  /** the stddevs of the centroids */
109  protected double[] m_centroidStdDevs;
110
111  /**
112   * initializes the generator with default values
113   */
114  public RandomRBF() {
115    super();
116
117    setNumAttributes(defaultNumAttributes());
118    setNumClasses(defaultNumClasses());
119    setNumCentroids(defaultNumCentroids());
120  }
121
122  /**
123   * Returns a string describing this data generator.
124   *
125   * @return a description of the data generator suitable for
126   * displaying in the explorer/experimenter gui
127   */
128  public String globalInfo() {
129    return 
130        "RandomRBF data is generated by first creating a random set of "
131      + "centers for each class. Each center is randomly assigned a weight, "
132      + "a central point per attribute, and a standard deviation. To "
133      + "generate new instances, a center is chosen at random taking the "
134      + "weights of each center into consideration. Attribute values are "
135      + "randomly generated and offset from the center, where the overall "
136      + "vector has been scaled so that its length equals a value sampled "
137      + "randomly from the Gaussian distribution of the center. The "
138      + "particular center chosen determines the class of the instance.\n "
139      + "RandomRBF data contains only numeric attributes as it is "
140      + "non-trivial to include nominal values.";
141  }
142
143 /**
144   * Returns an enumeration describing the available options.
145   *
146   * @return an enumeration of all the available options
147   */
148  public Enumeration listOptions() {
149    Vector result = enumToVector(super.listOptions());
150
151    result.addElement(new Option(
152          "\tThe number of attributes (default " 
153          + defaultNumAttributes() + ").",
154          "a", 1, "-a <num>"));
155
156    result.addElement(new Option(
157        "\tThe number of classes (default " + defaultNumClasses() + ")",
158        "c", 1, "-c <num>"));
159
160    result.add(new Option(
161              "\tThe number of centroids to use. (default " 
162              + defaultNumCentroids() + ")",
163              "C", 1, "-C <num>"));
164
165    return result.elements();
166  }
167
168  /**
169   * Parses a list of options for this object. <p/>
170   *
171   <!-- options-start -->
172   * Valid options are: <p/>
173   *
174   * <pre> -h
175   *  Prints this help.</pre>
176   *
177   * <pre> -o &lt;file&gt;
178   *  The name of the output file, otherwise the generated data is
179   *  printed to stdout.</pre>
180   *
181   * <pre> -r &lt;name&gt;
182   *  The name of the relation.</pre>
183   *
184   * <pre> -d
185   *  Whether to print debug informations.</pre>
186   *
187   * <pre> -S
188   *  The seed for random function (default 1)</pre>
189   *
190   * <pre> -n &lt;num&gt;
191   *  The number of examples to generate (default 100)</pre>
192   *
193   * <pre> -a &lt;num&gt;
194   *  The number of attributes (default 10).</pre>
195   *
196   * <pre> -c &lt;num&gt;
197   *  The number of classes (default 2)</pre>
198   *
199   * <pre> -C &lt;num&gt;
200   *  The number of centroids to use. (default 50)</pre>
201   *
202   <!-- options-end -->
203   *
204   * @param options the list of options as an array of strings
205   * @throws Exception if an option is not supported
206   */
207  public void setOptions(String[] options) throws Exception {
208    String        tmpStr;
209
210    super.setOptions(options);
211
212    tmpStr = Utils.getOption('a', options);
213    if (tmpStr.length() != 0)
214      setNumAttributes(Integer.parseInt(tmpStr));
215    else
216      setNumAttributes(defaultNumAttributes());
217
218    tmpStr = Utils.getOption('c', options);
219    if (tmpStr.length() != 0)
220      setNumClasses(Integer.parseInt(tmpStr));
221    else
222      setNumClasses(defaultNumClasses());
223   
224    tmpStr = Utils.getOption('C', options);
225    if (tmpStr.length() != 0)
226      setNumCentroids(Integer.parseInt(tmpStr));
227    else
228      setNumCentroids(defaultNumCentroids());
229  }
230
231  /**
232   * Gets the current settings of the datagenerator.
233   *
234   * @return an array of strings suitable for passing to setOptions
235   */
236  public String[] getOptions() {
237    Vector        result;
238    String[]      options;
239    int           i;
240   
241    result  = new Vector();
242    options = super.getOptions();
243    for (i = 0; i < options.length; i++)
244      result.add(options[i]);
245   
246    result.add("-a");
247    result.add("" + getNumAttributes());
248
249    result.add("-c");
250    result.add("" + getNumClasses());
251
252    result.add("-C");
253    result.add("" + getNumCentroids());
254   
255    return (String[]) result.toArray(new String[result.size()]);
256  }
257
258  /**
259   * returns the default number of attributes
260   *
261   * @return the default number of attributes
262   */
263  protected int defaultNumAttributes() {
264    return 10;
265  }
266
267  /**
268   * Sets the number of attributes the dataset should have.
269   * @param numAttributes the new number of attributes
270   */
271  public void setNumAttributes(int numAttributes) {
272    m_NumAttributes = numAttributes;
273  }
274
275  /**
276   * Gets the number of attributes that should be produced.
277   * @return the number of attributes that should be produced
278   */
279  public int getNumAttributes() { 
280    return m_NumAttributes; 
281  }
282 
283  /**
284   * Returns the tip text for this property
285   *
286   * @return tip text for this property suitable for
287   *         displaying in the explorer/experimenter gui
288   */
289  public String numAttributesTipText() {
290    return "The number of attributes the generated data will contain.";
291  }
292
293  /**
294   * returns the default number of classes
295   *
296   * @return the default number of classes
297   */
298  protected int defaultNumClasses() {
299    return 2;
300  }
301
302  /**
303   * Sets the number of classes the dataset should have.
304   * @param numClasses the new number of classes
305   */
306  public void setNumClasses(int numClasses) { 
307    m_NumClasses = numClasses; 
308  }
309
310  /**
311   * Gets the number of classes the dataset should have.
312   * @return the number of classes the dataset should have
313   */
314  public int getNumClasses() { 
315    return m_NumClasses; 
316  }
317 
318  /**
319   * Returns the tip text for this property
320   *
321   * @return tip text for this property suitable for
322   *         displaying in the explorer/experimenter gui
323   */
324  public String numClassesTipText() {
325    return "The number of classes to generate.";
326  }
327
328  /**
329   * returns the default number of centroids
330   *
331   * @return the default number of centroids
332   */
333  protected int defaultNumCentroids() {
334    return 50;
335  }
336 
337  /**
338   * Gets the number of centroids.
339   *
340   * @return the number of centroids.
341   */
342  public int getNumCentroids() { 
343    return m_NumCentroids; 
344  }
345 
346  /**
347   * Sets the number of centroids to use.
348   *
349   * @param value the number of centroids to use.
350   */
351  public void setNumCentroids(int value) { 
352    if (value > 0)
353      m_NumCentroids = value; 
354    else
355      System.out.println("At least 1 centroid is necessary (provided: " 
356          + value + ")!");
357  } 
358 
359  /**
360   * Returns the tip text for this property
361   *
362   * @return tip text for this property suitable for
363   *         displaying in the explorer/experimenter gui
364   */
365  public String numCentroidsTipText() {
366    return "The number of centroids to use.";
367  }
368
369  /**
370   * Return if single mode is set for the given data generator
371   * mode depends on option setting and or generator type.
372   *
373   * @return single mode flag
374   * @throws Exception if mode is not set yet
375   */
376  public boolean getSingleModeFlag() throws Exception {
377    return true;
378  }
379
380  /**
381   * returns a random index based on the given proportions
382   *
383   * @param proportionArray     the proportions
384   * @param random              the random number generator to use
385   * @return the random index
386   */
387  protected int chooseRandomIndexBasedOnProportions(
388      double[] proportionArray, Random random) {
389
390    double      probSum;
391    double      val;
392    int         index;
393    double      sum;
394
395    probSum = Utils.sum(proportionArray);
396    val     = random.nextDouble() * probSum;
397    index   = 0;
398    sum     = 0.0;
399   
400    while ((sum <= val) && (index < proportionArray.length))
401      sum += proportionArray[index++];
402   
403    return index - 1;
404  }
405
406  /**
407   * Initializes the format for the dataset produced.
408   * Must be called before the generateExample or generateExamples
409   * methods are used.
410   * Re-initializes the random number generator with the given seed.
411   *
412   * @return the format for the dataset
413   * @throws Exception if the generating of the format failed
414   * @see  #getSeed()
415   */
416  public Instances defineDataFormat() throws Exception {
417    int             i;
418    int             j;
419    FastVector      atts;
420    FastVector      clsValues;
421    Random          rand;
422
423    m_Random = new Random(getSeed());
424    rand     = getRandom();
425
426    // number of examples is the same as given per option
427    setNumExamplesAct(getNumExamples());
428
429    // initialize centroids
430    m_centroids       = new double[getNumCentroids()][getNumAttributes()];
431    m_centroidClasses = new int[getNumCentroids()];
432    m_centroidWeights = new double[getNumCentroids()];
433    m_centroidStdDevs = new double[getNumCentroids()];
434
435    for (i = 0; i < getNumCentroids(); i++) {
436      for (j = 0; j < getNumAttributes(); j++)
437        m_centroids[i][j] = rand.nextDouble();
438      m_centroidClasses[i] = rand.nextInt(getNumClasses());
439      m_centroidWeights[i] = rand.nextDouble();
440      m_centroidStdDevs[i] = rand.nextDouble();
441    }
442
443    // initialize dataset format
444    atts = new FastVector();
445    for (i = 0; i < getNumAttributes(); i++)
446      atts.addElement(new Attribute("a" + i));
447
448    clsValues = new FastVector();
449    for (i = 0; i < getNumClasses(); i++)
450      clsValues.addElement("c" + i);
451    atts.addElement(new Attribute("class", clsValues));
452   
453    m_DatasetFormat = new Instances(getRelationNameToUse(), atts, 0);
454   
455    return m_DatasetFormat;
456  }
457
458  /**
459   * Generates one example of the dataset.
460   *
461   * @return the generated example
462   * @throws Exception if the format of the dataset is not yet defined
463   * @throws Exception if the generator only works with generateExamples
464   * which means in non single mode
465   */
466  public Instance generateExample() throws Exception {
467    Instance    result;
468    int         centroid;
469    double[]    atts;
470    double      magnitude;
471    double      desiredMag;
472    double      scale;
473    int         i;
474    double      label;
475    Random      rand;
476
477    result = null;
478    rand   = getRandom();
479
480    if (m_DatasetFormat == null)
481      throw new Exception("Dataset format not defined.");
482
483    // generate class label based on class probs
484    centroid = chooseRandomIndexBasedOnProportions(m_centroidWeights, rand);
485    label    = m_centroidClasses[centroid];
486
487    // generate attributes
488    atts = new double[getNumAttributes() + 1];
489    for (i = 0; i < getNumAttributes(); i++)
490      atts[i] = (rand.nextDouble() * 2.0) - 1.0;
491    atts[atts.length - 1] = label;
492   
493    magnitude = 0.0;
494    for (i = 0; i < getNumAttributes(); i++)
495      magnitude += atts[i] * atts[i];
496   
497    magnitude  = Math.sqrt(magnitude);
498    desiredMag = rand.nextGaussian() * m_centroidStdDevs[centroid];
499    scale      = desiredMag / magnitude;
500    for (i = 0; i < getNumAttributes(); i++) {
501      atts[i] *= scale;
502      atts[i] += m_centroids[centroid][i];
503      result   = new DenseInstance(1.0, atts);
504    }
505
506    // dataset reference
507    result.setDataset(m_DatasetFormat);
508   
509    return result;
510  }
511
512  /**
513   * Generates all examples of the dataset. Re-initializes the random number
514   * generator with the given seed, before generating instances.
515   *
516   * @return the generated dataset
517   * @throws Exception if the format of the dataset is not yet defined
518   * @throws Exception if the generator only works with generateExample,
519   * which means in single mode
520   * @see   #getSeed()
521   */
522  public Instances generateExamples() throws Exception {
523    Instances       result;
524    int             i;
525
526    result   = new Instances(m_DatasetFormat, 0);
527    m_Random = new Random(getSeed());
528
529    for (i = 0; i < getNumExamplesAct(); i++)
530      result.add(generateExample());
531   
532    return result;
533  }
534
535  /**
536   * Generates a comment string that documentates the data generator.
537   * By default this string is added at the beginning of the produced output
538   * as ARFF file type, next after the options.
539   *
540   * @return string contains info about the generated rules
541   */
542  public String generateStart () {
543    StringBuffer        result;
544    int                 i;
545
546    result = new StringBuffer();
547
548    result.append("%\n");
549    result.append("% centroids:\n");
550    for (i = 0; i < getNumCentroids(); i++)
551      result.append(
552          "% " + i + ".: " + Utils.arrayToString(m_centroids[i]) + "\n");
553    result.append("%\n");
554    result.append(
555        "% centroidClasses: " + Utils.arrayToString(m_centroidClasses) + "\n");
556    result.append("%\n");
557    result.append(
558        "% centroidWeights: " + Utils.arrayToString(m_centroidWeights) + "\n");
559    result.append("%\n");
560    result.append(
561        "% centroidStdDevs: " + Utils.arrayToString(m_centroidStdDevs) + "\n");
562    result.append("%\n");
563   
564    return result.toString();
565  }
566
567  /**
568   * Generates a comment string that documentats the data generator.
569   * By default this string is added at the end of theproduces output
570   * as ARFF file type.
571   *
572   * @return string contains info about the generated rules
573   * @throws Exception if the generating of the documentaion fails
574   */
575  public String generateFinished() throws Exception {
576    return "";
577  }
578 
579  /**
580   * Returns the revision string.
581   *
582   * @return            the revision
583   */
584  public String getRevision() {
585    return RevisionUtils.extract("$Revision: 5987 $");
586  }
587
588  /**
589   * Main method for executing this class.
590   *
591   * @param args should contain arguments for the data producer:
592   */
593  public static void main(String[] args) {
594    runDataGenerator(new RandomRBF(), args);
595  }
596}
Note: See TracBrowser for help on using the repository browser.