source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/MathExpression.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 17.2 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    MathExpression.java
19 *    Copyright (C) 2004 Prados Julien
20 *    Copyright (C) 2002 University of Waikato, Hamilton, New Zealand
21 */
22
23package weka.filters.unsupervised.attribute;
24
25import weka.core.AttributeStats;
26import weka.core.Capabilities;
27import weka.core.Instance; 
28import weka.core.DenseInstance;
29import weka.core.Instances;
30import weka.core.MathematicalExpression;
31import weka.core.Option;
32import weka.core.Range;
33import weka.core.RevisionUtils;
34import weka.core.SparseInstance;
35import weka.core.Utils;
36import weka.core.Capabilities.Capability;
37import weka.core.mathematicalexpression.Parser;
38import weka.core.mathematicalexpression.Scanner;
39import java_cup.runtime.DefaultSymbolFactory;
40import java_cup.runtime.SymbolFactory;
41import weka.filters.UnsupervisedFilter;
42
43import java.io.ByteArrayInputStream;
44import java.util.Enumeration;
45import java.util.HashMap;
46import java.util.Vector;
47
48/**
49 <!-- globalinfo-start -->
50 * Modify numeric attributes according to a given expression
51 * <p/>
52 <!-- globalinfo-end -->
53 *
54 <!-- options-start -->
55 * Valid options are: <p/>
56 *
57 * <pre> -unset-class-temporarily
58 *  Unsets the class index temporarily before the filter is
59 *  applied to the data.
60 *  (default: no)</pre>
61 *
62 * <pre> -E &lt;expression&gt;
63 *  Specify the expression to apply. Eg. pow(A,6)/(MEAN+MAX)
64 *  Supported operators are +, -, *, /, pow, log,
65 *  abs, cos, exp, sqrt, tan, sin, ceil, floor, rint, (, ),
66 *  MEAN, MAX, MIN, SD, COUNT, SUM, SUMSQUARED, ifelse</pre>
67 *
68 * <pre> -R &lt;index1,index2-index4,...&gt;
69 *  Specify list of columns to ignore. First and last are valid
70 *  indexes. (default none)</pre>
71 *
72 * <pre> -V
73 *  Invert matching sense (i.e. only modify specified columns)</pre>
74 *
75 <!-- options-end -->
76 *
77 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
78 * @author Prados Julien (julien.prados@cui.unige.ch)
79 * @version $Revision: 5987 $
80 * @see MathematicalExpression
81 */
82public class MathExpression 
83  extends PotentialClassIgnorer
84  implements UnsupervisedFilter {
85 
86  /** for serialization */
87  static final long serialVersionUID = -3713222714671997901L;
88 
89  /** Stores which columns to select as a funky range */
90  protected Range m_SelectCols = new Range();
91   
92  /** The default modification expression */
93  public static final String m_defaultExpression = "(A-MIN)/(MAX-MIN)";
94
95  /** The modification expression */
96  private String m_expression = m_defaultExpression;
97 
98  /** Attributes statistics */
99  private AttributeStats[] m_attStats;
100 
101  /**
102   * Constructor
103   */
104  public MathExpression() {
105    super();
106    setInvertSelection(false);
107  } 
108 
109  /**
110   * Returns a string describing this filter
111   *
112   * @return a description of the filter suitable for
113   * displaying in the explorer/experimenter gui
114   */
115  public String globalInfo() {
116
117    return "Modify numeric attributes according to a given expression ";
118  }
119
120  /**
121   * Returns the Capabilities of this filter.
122   *
123   * @return            the capabilities of this object
124   * @see               Capabilities
125   */
126  public Capabilities getCapabilities() {
127    Capabilities result = super.getCapabilities();
128    result.disableAll();
129
130    // attributes
131    result.enableAllAttributes();
132    result.enable(Capability.MISSING_VALUES);
133   
134    // class
135    result.enableAllClasses();
136    result.enable(Capability.MISSING_CLASS_VALUES);
137    result.enable(Capability.NO_CLASS);
138   
139    return result;
140  }
141 
142  /**
143   * Sets the format of the input instances.
144   *
145   * @param instanceInfo an Instances object containing the input
146   * instance structure (any instances contained in the object are
147   * ignored - only the structure is required).
148   * @return true if the outputFormat may be collected immediately
149   * @throws Exception if the input format can't be set
150   * successfully
151   */
152  public boolean setInputFormat(Instances instanceInfo) 
153       throws Exception {
154    m_SelectCols.setUpper(instanceInfo.numAttributes() - 1);
155    super.setInputFormat(instanceInfo);
156    setOutputFormat(instanceInfo);
157    m_attStats = null;
158    return true;
159  }
160
161  /**
162   * Input an instance for filtering. Filter requires all
163   * training instances be read before producing output.
164   *
165   * @param instance the input instance
166   * @return true if the filtered instance may now be
167   * collected with output().
168   * @throws IllegalStateException if no input format has been set.
169   */
170  public boolean input(Instance instance) throws Exception {
171
172    if (getInputFormat() == null) {
173      throw new IllegalStateException("No input instance format defined");
174    }
175    if (m_NewBatch) {
176      resetQueue();
177      m_NewBatch = false;
178    }
179    if (m_attStats == null) {
180      bufferInput(instance);
181      return false;
182    } else {
183      convertInstance(instance);
184      return true;
185    }
186  }
187
188  /**
189   * Signify that this batch of input to the filter is finished.
190   * If the filter requires all instances prior to filtering,
191   * output() may now be called to retrieve the filtered instances.
192   *
193   * @return true if there are instances pending output
194   * @throws IllegalStateException if no input structure has been defined
195   */
196  public boolean batchFinished() throws Exception {
197
198    if (getInputFormat() == null) {
199      throw new IllegalStateException("No input instance format defined");
200    }
201    if (m_attStats == null) {
202      Instances input = getInputFormat();
203
204      m_attStats = new AttributeStats [input.numAttributes()];
205     
206      for (int i = 0; i < input.numAttributes(); i++) {
207        if (input.attribute(i).isNumeric() &&
208            (input.classIndex() != i)) {
209          m_attStats[i] = input.attributeStats(i);
210        }
211      }
212
213      // Convert pending input instances
214      for(int i = 0; i < input.numInstances(); i++) {
215        convertInstance(input.instance(i));
216      }
217    } 
218    // Free memory
219    flushInput();
220
221    m_NewBatch = true;
222    return (numPendingOutput() != 0);
223  }
224 
225  /**
226   * Evaluates the symbols.
227   *
228   * @param symbols     the symbols to use for evaluation
229   * @return            the calculated value, Double.NaN in case of an error
230   */
231  protected double eval(HashMap symbols) {
232    SymbolFactory               sf;
233    ByteArrayInputStream        parserInput;
234    Parser                      parser;
235    double                      result;
236   
237    try {
238      sf          = new DefaultSymbolFactory();
239      parserInput = new ByteArrayInputStream(m_expression.getBytes());
240      parser      = new Parser(new Scanner(parserInput, sf), sf);
241      parser.setSymbols(symbols);
242      parser.parse();
243      result = parser.getResult();
244    }
245    catch (Exception e) {
246      result = Double.NaN;
247      e.printStackTrace();
248    }
249   
250    return result;
251  }
252 
253  /**
254   * Convert a single instance over. The converted instance is
255   * added to the end of the output queue.
256   *
257   * @param instance the instance to convert
258   * @throws Exception if instance cannot be converted
259   */
260  private void convertInstance(Instance instance) throws Exception {
261 
262    Instance inst = null;
263    HashMap symbols = new HashMap(5);
264    if (instance instanceof SparseInstance) {
265      double[] newVals = new double[instance.numAttributes()];
266      int[] newIndices = new int[instance.numAttributes()];
267      double[] vals = instance.toDoubleArray();
268      int ind = 0;
269      double value;
270      for (int j = 0; j < instance.numAttributes(); j++) {
271        if (m_SelectCols.isInRange(j)) {         
272          if (instance.attribute(j).isNumeric() &&
273            (!Utils.isMissingValue(vals[j])) &&
274            (getInputFormat().classIndex() != j)) {
275              symbols.put("A", new Double(vals[j])); 
276              symbols.put("MAX", new Double(m_attStats[j].numericStats.max));
277              symbols.put("MIN", new Double(m_attStats[j].numericStats.min));
278              symbols.put("MEAN", new Double(m_attStats[j].numericStats.mean));
279              symbols.put("SD", new Double(m_attStats[j].numericStats.stdDev));
280              symbols.put("COUNT", new Double(m_attStats[j].numericStats.count));
281              symbols.put("SUM", new Double(m_attStats[j].numericStats.sum));
282              symbols.put("SUMSQUARED", new Double(m_attStats[j].numericStats.sumSq));
283              value = eval(symbols);
284              if (Double.isNaN(value) || Double.isInfinite(value)) {
285                  System.err.println("WARNING:Error in evaluating the expression: missing value set");
286                  value = Utils.missingValue();
287              }
288              if (value != 0.0) {
289                newVals[ind] = value;
290                newIndices[ind] = j;
291                ind++;
292              }
293             
294          }
295        } else {
296          value = vals[j];
297          if (value != 0.0) {
298            newVals[ind] = value;
299            newIndices[ind] = j;
300            ind++;
301          }
302        }
303      } 
304      double[] tempVals = new double[ind];
305      int[] tempInd = new int[ind];
306      System.arraycopy(newVals, 0, tempVals, 0, ind);
307      System.arraycopy(newIndices, 0, tempInd, 0, ind);
308      inst = new SparseInstance(instance.weight(), tempVals, tempInd,
309                                instance.numAttributes());
310    } else {
311      double[] vals = instance.toDoubleArray();
312      for (int j = 0; j < getInputFormat().numAttributes(); j++) {
313        if (m_SelectCols.isInRange(j)) {
314          if (instance.attribute(j).isNumeric() &&
315              (!Utils.isMissingValue(vals[j])) &&
316              (getInputFormat().classIndex() != j)) {
317              symbols.put("A", new Double(vals[j])); 
318              symbols.put("MAX", new Double(m_attStats[j].numericStats.max));
319              symbols.put("MIN", new Double(m_attStats[j].numericStats.min));
320              symbols.put("MEAN", new Double(m_attStats[j].numericStats.mean));
321              symbols.put("SD", new Double(m_attStats[j].numericStats.stdDev));
322              symbols.put("COUNT", new Double(m_attStats[j].numericStats.count));
323              symbols.put("SUM", new Double(m_attStats[j].numericStats.sum));
324              symbols.put("SUMSQUARED", new Double(m_attStats[j].numericStats.sumSq));
325              vals[j] = eval(symbols);
326              if (Double.isNaN(vals[j]) || Double.isInfinite(vals[j])) {
327                  System.err.println("WARNING:Error in Evaluation the Expression: missing value set");
328                  vals[j] = Utils.missingValue();
329              }
330          }
331        }
332      }
333      inst = new DenseInstance(instance.weight(), vals);
334    }
335    inst.setDataset(instance.dataset());
336    push(inst);
337  }
338
339  /**
340   * Parses a given list of options. <p/>
341   *
342   <!-- options-start -->
343   * Valid options are: <p/>
344   *
345   * <pre> -unset-class-temporarily
346   *  Unsets the class index temporarily before the filter is
347   *  applied to the data.
348   *  (default: no)</pre>
349   *
350   * <pre> -E &lt;expression&gt;
351   *  Specify the expression to apply. Eg. pow(A,6)/(MEAN+MAX)
352   *  Supported operators are +, -, *, /, pow, log,
353   *  abs, cos, exp, sqrt, tan, sin, ceil, floor, rint, (, ),
354   *  MEAN, MAX, MIN, SD, COUNT, SUM, SUMSQUARED, ifelse</pre>
355   *
356   * <pre> -R &lt;index1,index2-index4,...&gt;
357   *  Specify list of columns to ignore. First and last are valid
358   *  indexes. (default none)</pre>
359   *
360   * <pre> -V
361   *  Invert matching sense (i.e. only modify specified columns)</pre>
362   *
363   <!-- options-end -->
364   *
365   * @param options the list of options as an array of strings
366   * @throws Exception if an option is not supported
367   */
368  public void setOptions(String[] options) throws Exception {
369    super.setOptions(options);
370
371    String expString = Utils.getOption('E', options);
372    if (expString.length() != 0) {
373      setExpression(expString);
374    } else {
375      setExpression(m_defaultExpression);
376    }
377   
378    String ignoreList = Utils.getOption('R', options);
379    if (ignoreList.length() != 0) {
380      setIgnoreRange(ignoreList);
381    }
382
383    setInvertSelection(Utils.getFlag('V', options));
384  }
385 
386  /**
387   * Gets the current settings of the filter.
388   *
389   * @return an array of strings suitable for passing to setOptions
390   */
391  public String [] getOptions() {
392    Vector        result;
393    String[]      options;
394    int           i;
395
396    result = new Vector();
397
398    options = super.getOptions();
399    for (i = 0; i < options.length; i++)
400      result.add(options[i]);
401
402    result.add("-E");
403    result.add(getExpression());
404   
405    if (getInvertSelection())
406      result.add("-V");
407
408    if (!getIgnoreRange().equals("")) {
409      result.add("-R");
410      result.add(getIgnoreRange());
411    }
412
413    return (String[]) result.toArray(new String[result.size()]);
414  }
415 
416  /**
417   * Returns an enumeration describing the available options.
418   *
419   * @return an enumeration of all the available options.
420   */
421  public Enumeration listOptions() {
422    Vector result = new Vector();
423    Enumeration enm = super.listOptions();
424    while (enm.hasMoreElements())
425      result.add(enm.nextElement());
426     
427    result.addElement(new Option(
428        "\tSpecify the expression to apply. Eg. pow(A,6)/(MEAN+MAX)"
429        +"\n\tSupported operators are +, -, *, /, pow, log,"
430        +"\n\tabs, cos, exp, sqrt, tan, sin, ceil, floor, rint, (, ), "
431        +"\n\tMEAN, MAX, MIN, SD, COUNT, SUM, SUMSQUARED, ifelse",
432        "E",1,"-E <expression>"));
433   
434    result.addElement(new Option(
435        "\tSpecify list of columns to ignore. First and last are valid\n"
436        +"\tindexes. (default none)",
437        "R", 1, "-R <index1,index2-index4,...>"));
438   
439    result.addElement(new Option(
440        "\tInvert matching sense (i.e. only modify specified columns)",
441        "V", 0, "-V"));
442   
443    return result.elements();
444  }
445 
446  /**
447   * Returns the tip text for this property
448   *
449   * @return tip text for this property suitable for
450   * displaying in the explorer/experimenter gui
451   */
452  public String expressionTipText() {
453    return "Specify the expression to apply. The 'A' letter"
454             + "refers to the attribute value. MIN,MAX,MEAN,SD"
455             + "refer respectively to minimum, maximum, mean and"
456             + "standard deviation of the attribute."
457             +"\n\tSupported operators are +, -, *, /, pow, log,"
458             +"abs, cos, exp, sqrt, tan, sin, ceil, floor, rint, (, ),"
459             +"A,MEAN, MAX, MIN, SD, COUNT, SUM, SUMSQUARED, ifelse"
460             +"\n\tEg. pow(A,6)/(MEAN+MAX)*ifelse(A<0,0,sqrt(A))+ifelse(![A>9 && A<15])";
461  }
462 
463  /**
464   * Set the expression to apply
465   * @param expr a mathematical expression to apply
466   */
467  public void setExpression(String expr) {
468    m_expression = expr;
469  }
470
471  /**
472   * Get the expression
473   * @return the expression
474   */
475  public String getExpression() {
476    return m_expression;
477  }
478 
479    /**
480   * Returns the tip text for this property
481   *
482   * @return tip text for this property suitable for
483   * displaying in the explorer/experimenter gui
484   */
485  public String invertSelectionTipText() {
486
487    return "Determines whether action is to select or unselect."
488      + " If set to true, only the specified attributes will be modified;"
489      + " If set to false, specified attributes will not be modified.";
490  }
491
492  /**
493   * Get whether the supplied columns are to be select or unselect
494   *
495   * @return true if the supplied columns will be kept
496   */
497  public boolean getInvertSelection() {
498
499    return !m_SelectCols.getInvert();
500  }
501
502  /**
503   * Set whether selected columns should be select or unselect. If true the
504   * selected columns are modified. If false the selected columns are not
505   * modified.
506   *
507   * @param invert the new invert setting
508   */
509  public void setInvertSelection(boolean invert) {
510
511    m_SelectCols.setInvert(!invert);
512  }
513
514  /**
515   * Returns the tip text for this property
516   *
517   * @return tip text for this property suitable for
518   * displaying in the explorer/experimenter gui
519   */
520  public String ignoreRangeTipText() {
521
522    return "Specify range of attributes to act on."
523      + " This is a comma separated list of attribute indices, with"
524      + " \"first\" and \"last\" valid values. Specify an inclusive"
525      + " range with \"-\". E.g: \"first-3,5,6-10,last\".";
526  }
527
528  /**
529   * Get the current range selection.
530   *
531   * @return a string containing a comma separated list of ranges
532   */
533  public String getIgnoreRange() {
534
535    return m_SelectCols.getRanges();
536  }
537
538  /**
539   * Set which attributes are to be ignored
540   *
541   * @param rangeList a string representing the list of attributes.  Since
542   * the string will typically come from a user, attributes are indexed from
543   * 1. <br/>
544   * eg: first-3,5,6-last
545   */
546  public void setIgnoreRange(String rangeList) {
547
548    m_SelectCols.setRanges(rangeList);
549  }
550 
551  /**
552   * Returns the revision string.
553   *
554   * @return            the revision
555   */
556  public String getRevision() {
557    return RevisionUtils.extract("$Revision: 5987 $");
558  }
559 
560  /**
561   * Main method for testing this class.
562   *
563   * @param argv should contain arguments to the filter:
564   * use -h for help
565   */
566  public static void main(String [] argv) {
567    runFilter(new MathExpression(), argv);
568  }
569}
Note: See TracBrowser for help on using the repository browser.