source: src/main/java/weka/filters/unsupervised/attribute/PropositionalToMultiInstance.java @ 4

Last change on this file since 4 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 13.4 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * PropositionalToMultiInstance.java
19 * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.filters.unsupervised.attribute;
24
25import weka.core.Attribute;
26import weka.core.Capabilities;
27import weka.core.FastVector;
28import weka.core.Instance; 
29import weka.core.DenseInstance;
30import weka.core.Instances;
31import weka.core.Option;
32import weka.core.OptionHandler;
33import weka.core.RelationalLocator;
34import weka.core.RevisionUtils;
35import weka.core.StringLocator;
36import weka.core.Utils;
37import weka.core.Capabilities.Capability;
38import weka.filters.Filter;
39import weka.filters.UnsupervisedFilter;
40
41import java.util.Enumeration;
42import java.util.Random;
43import java.util.Vector;
44
45/**
46 <!-- globalinfo-start -->
47 * Converts the propositional instance dataset into multi-instance dataset (with relational attribute). When normalize or standardize a multi-instance dataset, a MIToSingleInstance filter can be applied first to convert the multi-instance dataset into propositional instance dataset. After normalization or standardization, may use this PropositionalToMultiInstance filter to convert the data back to multi-instance format.<br/>
48 * <br/>
49 * Note: the first attribute of the original propositional instance dataset must be a nominal attribute which is expected to be bagId attribute.
50 * <p/>
51 <!-- globalinfo-end -->
52 *
53 <!-- options-start -->
54 * Valid options are: <p/>
55 *
56 * <pre> -S &lt;num&gt;
57 *  The seed for the randomization of the order of bags. (default 1)</pre>
58 *
59 * <pre> -R
60 *  Randomizes the order of the produced bags after the generation. (default off)</pre>
61 *
62 <!-- options-end -->
63 *
64 * @author Lin Dong (ld21@cs.waikato.ac.nz)
65 * @version $Revision: 5987 $
66 * @see MultiInstanceToPropositional
67 */
68public class PropositionalToMultiInstance 
69  extends Filter
70  implements OptionHandler, UnsupervisedFilter {
71
72  /** for serialization */
73  private static final long serialVersionUID = 5825873573912102482L;
74
75  /** the seed for randomizing, default is 1 */
76  protected int m_Seed = 1;
77 
78  /** whether to randomize the output data */
79  protected boolean m_Randomize = false;
80
81  /** Indices of string attributes in the bag */
82  protected StringLocator m_BagStringAtts = null;
83
84  /** Indices of relational attributes in the bag */
85  protected RelationalLocator m_BagRelAtts = null;
86 
87  /**
88   * Returns a string describing this filter
89   *
90   * @return a description of the filter suitable for
91   * displaying in the explorer/experimenter gui
92   */
93  public String globalInfo() {
94    return 
95        "Converts the propositional instance dataset into multi-instance "
96      + "dataset (with relational attribute). When normalize or standardize a "
97      + "multi-instance dataset, a MIToSingleInstance filter can be applied "
98      + "first to convert the multi-instance dataset into propositional "
99      + "instance dataset. After normalization or standardization, may use "
100      + "this PropositionalToMultiInstance filter to convert the data back to "
101      + "multi-instance format.\n\n"
102      + "Note: the first attribute of the original propositional instance "
103      + "dataset must be a nominal attribute which is expected to be bagId "
104      + "attribute.";
105
106  }
107
108  /**
109   * Returns an enumeration describing the available options
110   *
111   * @return an enumeration of all the available options
112   */
113  public Enumeration listOptions() {
114    Vector result = new Vector();
115 
116    result.addElement(new Option(
117        "\tThe seed for the randomization of the order of bags."
118        + "\t(default 1)",
119        "S", 1, "-S <num>"));
120 
121    result.addElement(new Option(
122        "\tRandomizes the order of the produced bags after the generation."
123        + "\t(default off)",
124        "R", 0, "-R"));
125 
126    return result.elements();
127  }
128
129
130  /**
131   * Parses a given list of options. <p/>
132   *
133   <!-- options-start -->
134   * Valid options are: <p/>
135   *
136   * <pre> -S &lt;num&gt;
137   *  The seed for the randomization of the order of bags. (default 1)</pre>
138   *
139   * <pre> -R
140   *  Randomizes the order of the produced bags after the generation. (default off)</pre>
141   *
142   <!-- options-end -->
143   *
144   * @param options the list of options as an array of strings
145   * @throws Exception if an option is not supported
146   */
147  public void setOptions(String[] options) throws Exception {
148    String        tmpStr;
149   
150    setRandomize(Utils.getFlag('R', options));
151   
152    tmpStr = Utils.getOption('S', options);
153    if (tmpStr.length() != 0)
154      setSeed(Integer.parseInt(tmpStr));
155    else
156      setSeed(1);
157  }
158
159  /**
160   * Gets the current settings of the classifier.
161   *
162   * @return an array of strings suitable for passing to setOptions
163   */
164  public String [] getOptions() {
165    Vector        result;
166   
167    result = new Vector();
168   
169    result.add("-S");
170    result.add("" + getSeed());
171   
172    if (m_Randomize)
173      result.add("-R");
174
175    return (String[]) result.toArray(new String[result.size()]);
176  }
177
178  /**
179   * Returns the tip text for this property
180   *
181   * @return            tip text for this property suitable for
182   *                    displaying in the explorer/experimenter gui
183   */
184  public String seedTipText() {
185    return "The random seed used by the random number generator";
186  }
187
188  /**
189   * Sets the new seed for randomizing the order of the generated data
190   *
191   * @param value     the new seed value
192   */
193  public void setSeed(int value) {
194    m_Seed = value;
195  }
196 
197  /**
198   * Returns the current seed value for randomizing the order of the generated
199   * data
200   *
201   * @return          the current seed value
202   */
203  public int getSeed() {
204    return m_Seed;
205  }
206 
207  /**
208   * Sets whether the order of the generated data is randomized
209   *
210   * @param value     whether to randomize or not
211   */
212  public void setRandomize(boolean value) {
213    m_Randomize = value;
214  }
215 
216  /**
217   * Gets whether the order of the generated is randomized
218   *
219   * @return      true if the order is randomized
220   */
221  public boolean getRandomize() {
222    return m_Randomize;
223  }
224
225  /**
226   * Returns the tip text for this property
227   *
228   * @return tip text for this property suitable for
229   * displaying in the explorer/experimenter gui
230   */
231  public String randomizeTipText() {
232    return "Whether the order of the generated data is randomized.";
233  }
234
235  /**
236   * Returns the Capabilities of this filter.
237   *
238   * @return            the capabilities of this object
239   * @see               Capabilities
240   */
241  public Capabilities getCapabilities() {
242    Capabilities result = super.getCapabilities();
243    result.disableAll();
244
245    // attributes
246    result.enable(Capability.NOMINAL_ATTRIBUTES);
247    result.enable(Capability.NUMERIC_ATTRIBUTES);
248    result.enable(Capability.DATE_ATTRIBUTES);
249    result.enable(Capability.STRING_ATTRIBUTES);
250    result.enable(Capability.MISSING_VALUES);
251   
252    // class
253    result.enableAllClasses();
254    result.enable(Capability.MISSING_CLASS_VALUES);
255    result.enable(Capability.NO_CLASS);
256   
257    return result;
258  }
259 
260  /**
261   * Sets the format of the input instances.
262   *
263   * @param instanceInfo an Instances object containing the input
264   * instance structure (any instances contained in the object are
265   * ignored - only the structure is required).
266   * @return true if the outputFormat may be collected immediately
267   * @throws Exception if the input format can't be set
268   * successfully
269   */
270  public boolean setInputFormat(Instances instanceInfo) 
271    throws Exception {
272
273    if (instanceInfo.attribute(0).type()!= Attribute.NOMINAL) {
274      throw new Exception("The first attribute type of the original propositional instance dataset must be Nominal!");
275    }
276    super.setInputFormat(instanceInfo);
277
278    /* create a new output format (multi-instance format) */
279    Instances newData = instanceInfo.stringFreeStructure();
280    Attribute attBagIndex = (Attribute) newData.attribute(0).copy();
281    Attribute attClass = (Attribute) newData.classAttribute().copy();
282    // remove the bagIndex attribute
283    newData.deleteAttributeAt(0);
284    // remove the class attribute
285    newData.setClassIndex(-1);
286    newData.deleteAttributeAt(newData.numAttributes() - 1);
287
288    FastVector attInfo = new FastVector(3); 
289    attInfo.addElement(attBagIndex);
290    attInfo.addElement(new Attribute("bag", newData)); // relation-valued attribute
291    attInfo.addElement(attClass);
292    Instances data = new Instances("Multi-Instance-Dataset", attInfo, 0); 
293    data.setClassIndex(data.numAttributes() - 1);
294
295    super.setOutputFormat(data.stringFreeStructure());
296
297    m_BagStringAtts = new StringLocator(data.attribute(1).relation());
298    m_BagRelAtts    = new RelationalLocator(data.attribute(1).relation());
299   
300    return true;
301  }
302
303  /**
304   * adds a new bag out of the given data and adds it to the output
305   *
306   * @param input       the intput dataset
307   * @param output      the dataset this bag is added to
308   * @param bagInsts    the instances in this bag
309   * @param bagIndex    the bagIndex of this bag
310   * @param classValue  the associated class value
311   * @param bagWeight   the weight of the bag
312   */
313  protected void addBag(
314      Instances input,
315      Instances output,
316      Instances bagInsts, 
317      int bagIndex, 
318      double classValue, 
319      double bagWeight) {
320   
321    // copy strings/relational values
322    for (int i = 0; i < bagInsts.numInstances(); i++) {
323      RelationalLocator.copyRelationalValues(
324          bagInsts.instance(i), false, 
325          input, m_InputRelAtts,
326          bagInsts, m_BagRelAtts);
327
328      StringLocator.copyStringValues(
329          bagInsts.instance(i), false, 
330          input, m_InputStringAtts,
331          bagInsts, m_BagStringAtts);
332    }
333   
334    int value = output.attribute(1).addRelation(bagInsts);
335    Instance newBag = new DenseInstance(output.numAttributes());       
336    newBag.setValue(0, bagIndex);
337    newBag.setValue(2, classValue);
338    newBag.setValue(1, value);
339    newBag.setWeight(bagWeight);
340    newBag.setDataset(output);
341    output.add(newBag);
342  }
343
344  /**
345   * Adds an output instance to the queue. The derived class should use this
346   * method for each output instance it makes available.
347   *
348   * @param instance the instance to be added to the queue.
349   */
350  protected void push(Instance instance) {
351    if (instance != null) {
352      super.push(instance);
353      // set correct references
354    }
355  }
356 
357  /**
358   * Signify that this batch of input to the filter is finished.
359   * If the filter requires all instances prior to filtering,
360   * output() may now be called to retrieve the filtered instances.
361   *
362   * @return true if there are instances pending output
363   * @throws IllegalStateException if no input structure has been defined
364   */
365  public boolean batchFinished() {
366
367    if (getInputFormat() == null) {
368      throw new IllegalStateException("No input instance format defined");
369    }
370
371    Instances input = getInputFormat();
372    input.sort(0);   // make sure that bagID is sorted
373    Instances output = getOutputFormat();
374    Instances bagInsts = output.attribute(1).relation();
375    Instance inst = new DenseInstance(bagInsts.numAttributes());
376    inst.setDataset(bagInsts);
377
378    double bagIndex   = input.instance(0).value(0);
379    double classValue = input.instance(0).classValue(); 
380    double bagWeight  = 0.0;
381
382    // Convert pending input instances
383    for(int i = 0; i < input.numInstances(); i++) {
384      double currentBagIndex = input.instance(i).value(0);
385
386      // copy the propositional instance value, except the bagIndex and the class value
387      for (int j = 0; j < input.numAttributes() - 2; j++) 
388        inst.setValue(j, input.instance(i).value(j + 1));
389      inst.setWeight(input.instance(i).weight());
390
391      if (currentBagIndex == bagIndex){
392        bagInsts.add(inst);
393        bagWeight += inst.weight();
394      }
395      else{
396        addBag(input, output, bagInsts, (int) bagIndex, classValue, bagWeight);
397
398        bagInsts   = bagInsts.stringFreeStructure(); 
399        bagInsts.add(inst);
400        bagIndex   = currentBagIndex;
401        classValue = input.instance(i).classValue();
402        bagWeight  = inst.weight();
403      }
404    }
405
406    // reach the last instance, create and add the last bag
407    addBag(input, output, bagInsts, (int) bagIndex, classValue, bagWeight);
408
409    if (getRandomize())
410      output.randomize(new Random(getSeed()));
411   
412    for (int i = 0; i < output.numInstances(); i++)
413      push(output.instance(i));
414   
415    // Free memory
416    flushInput();
417
418    m_NewBatch = true;
419    m_FirstBatchDone = true;
420   
421    return (numPendingOutput() != 0);
422  }
423 
424  /**
425   * Returns the revision string.
426   *
427   * @return            the revision
428   */
429  public String getRevision() {
430    return RevisionUtils.extract("$Revision: 5987 $");
431  }
432
433  /**
434   * Main method for running this filter.
435   *
436   * @param args should contain arguments to the filter:
437   * use -h for help
438   */
439  public static void main(String[] args) {
440    runFilter(new PropositionalToMultiInstance(), args);
441  }
442}
Note: See TracBrowser for help on using the repository browser.